summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Zimmermann <tzimmermann@suse.de>2024-10-25 15:43:37 +0200
committerThomas Zimmermann <tzimmermann@suse.de>2024-10-25 15:43:37 +0200
commit12837c251571546470056761ee0c7b944c89edc3 (patch)
tree1bc9f3933ad713077636bae8522f41f43078c861
parent8ccf06f174811c962af31157c247cffbd9317f0a (diff)
2024y-10m-25d-13h-42m-32s UTC: drm-tip rerere cache update
git version 2.46.0
-rw-r--r--rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/postimage.21780
-rw-r--r--rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage1784
-rw-r--r--rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage.11784
-rw-r--r--rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage.21784
-rw-r--r--rr-cache/641dfaef393e85daf275c6725aed0c69ef5046ad/preimage1089
5 files changed, 1089 insertions, 7132 deletions
diff --git a/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/postimage.2 b/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/postimage.2
deleted file mode 100644
index aec7db39c061..000000000000
--- a/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/postimage.2
+++ /dev/null
@@ -1,1780 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2021 Intel Corporation
- */
-
-#include "xe_lrc.h"
-
-#include <generated/xe_wa_oob.h>
-
-#include <linux/ascii85.h>
-
-#include "instructions/xe_mi_commands.h"
-#include "instructions/xe_gfxpipe_commands.h"
-#include "instructions/xe_gfx_state_commands.h"
-#include "regs/xe_engine_regs.h"
-#include "regs/xe_lrc_layout.h"
-#include "xe_bb.h"
-#include "xe_bo.h"
-#include "xe_device.h"
-#include "xe_drm_client.h"
-#include "xe_exec_queue_types.h"
-#include "xe_gt.h"
-#include "xe_gt_printk.h"
-#include "xe_hw_fence.h"
-#include "xe_map.h"
-#include "xe_memirq.h"
-#include "xe_sriov.h"
-#include "xe_vm.h"
-#include "xe_wa.h"
-
-#define LRC_VALID BIT_ULL(0)
-#define LRC_PRIVILEGE BIT_ULL(8)
-#define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3)
-#define LRC_LEGACY_64B_CONTEXT 3
-
-#define LRC_ENGINE_CLASS GENMASK_ULL(63, 61)
-#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48)
-
-#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
-
-struct xe_lrc_snapshot {
- struct xe_bo *lrc_bo;
- void *lrc_snapshot;
- unsigned long lrc_size, lrc_offset;
-
- u32 context_desc;
- u32 indirect_context_desc;
- u32 head;
- struct {
- u32 internal;
- u32 memory;
- } tail;
- u32 start_seqno;
- u32 seqno;
- u32 ctx_timestamp;
- u32 ctx_job_timestamp;
-};
-
-static struct xe_device *
-lrc_to_xe(struct xe_lrc *lrc)
-{
- return gt_to_xe(lrc->fence_ctx.gt);
-}
-
-size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
-{
- struct xe_device *xe = gt_to_xe(gt);
- size_t size;
-
- switch (class) {
- case XE_ENGINE_CLASS_RENDER:
- if (GRAPHICS_VER(xe) >= 20)
- size = 4 * SZ_4K;
- else
- size = 14 * SZ_4K;
- break;
- case XE_ENGINE_CLASS_COMPUTE:
- /* 14 pages since graphics_ver == 11 */
- if (GRAPHICS_VER(xe) >= 20)
- size = 3 * SZ_4K;
- else
- size = 14 * SZ_4K;
- break;
- default:
- WARN(1, "Unknown engine class: %d", class);
- fallthrough;
- case XE_ENGINE_CLASS_COPY:
- case XE_ENGINE_CLASS_VIDEO_DECODE:
- case XE_ENGINE_CLASS_VIDEO_ENHANCE:
- case XE_ENGINE_CLASS_OTHER:
- size = 2 * SZ_4K;
- }
-
- /* Add indirect ring state page */
- if (xe_gt_has_indirect_ring_state(gt))
- size += LRC_INDIRECT_RING_STATE_SIZE;
-
- return size;
-}
-
-/*
- * The per-platform tables are u8-encoded in @data. Decode @data and set the
- * addresses' offset and commands in @regs. The following encoding is used
- * for each byte. There are 2 steps: decoding commands and decoding addresses.
- *
- * Commands:
- * [7]: create NOPs - number of NOPs are set in lower bits
- * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
- * MI_LRI_FORCE_POSTED
- * [5:0]: Number of NOPs or registers to set values to in case of
- * MI_LOAD_REGISTER_IMM
- *
- * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
- * number of registers. They are set by using the REG/REG16 macros: the former
- * is used for offsets smaller than 0x200 while the latter is for values bigger
- * than that. Those macros already set all the bits documented below correctly:
- *
- * [7]: When a register offset needs more than 6 bits, use additional bytes, to
- * follow, for the lower bits
- * [6:0]: Register offset, without considering the engine base.
- *
- * This function only tweaks the commands and register offsets. Values are not
- * filled out.
- */
-static void set_offsets(u32 *regs,
- const u8 *data,
- const struct xe_hw_engine *hwe)
-#define NOP(x) (BIT(7) | (x))
-#define LRI(count, flags) ((flags) << 6 | (count) | \
- BUILD_BUG_ON_ZERO(count >= BIT(6)))
-#define POSTED BIT(0)
-#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
-#define REG16(x) \
- (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
- (((x) >> 2) & 0x7f)
-{
- const u32 base = hwe->mmio_base;
-
- while (*data) {
- u8 count, flags;
-
- if (*data & BIT(7)) { /* skip */
- count = *data++ & ~BIT(7);
- regs += count;
- continue;
- }
-
- count = *data & 0x3f;
- flags = *data >> 6;
- data++;
-
- *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
- if (flags & POSTED)
- *regs |= MI_LRI_FORCE_POSTED;
- *regs |= MI_LRI_LRM_CS_MMIO;
- regs++;
-
- xe_gt_assert(hwe->gt, count);
- do {
- u32 offset = 0;
- u8 v;
-
- do {
- v = *data++;
- offset <<= 7;
- offset |= v & ~BIT(7);
- } while (v & BIT(7));
-
- regs[0] = base + (offset << 2);
- regs += 2;
- } while (--count);
- }
-
- *regs = MI_BATCH_BUFFER_END | BIT(0);
-}
-
-static const u8 gen12_xcs_offsets[] = {
- NOP(1),
- LRI(13, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
-
- NOP(5),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- 0
-};
-
-static const u8 dg2_xcs_offsets[] = {
- NOP(1),
- LRI(15, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
- REG(0x120),
- REG(0x124),
-
- NOP(1),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- 0
-};
-
-static const u8 gen12_rcs_offsets[] = {
- NOP(1),
- LRI(13, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
-
- NOP(5),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- LRI(3, POSTED),
- REG(0x1b0),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
- NOP(3 + 9 + 1),
-
- LRI(51, POSTED),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG(0x028),
- REG(0x09c),
- REG(0x0c0),
- REG(0x178),
- REG(0x17c),
- REG16(0x358),
- REG(0x170),
- REG(0x150),
- REG(0x154),
- REG(0x158),
- REG16(0x41c),
- REG16(0x600),
- REG16(0x604),
- REG16(0x608),
- REG16(0x60c),
- REG16(0x610),
- REG16(0x614),
- REG16(0x618),
- REG16(0x61c),
- REG16(0x620),
- REG16(0x624),
- REG16(0x628),
- REG16(0x62c),
- REG16(0x630),
- REG16(0x634),
- REG16(0x638),
- REG16(0x63c),
- REG16(0x640),
- REG16(0x644),
- REG16(0x648),
- REG16(0x64c),
- REG16(0x650),
- REG16(0x654),
- REG16(0x658),
- REG16(0x65c),
- REG16(0x660),
- REG16(0x664),
- REG16(0x668),
- REG16(0x66c),
- REG16(0x670),
- REG16(0x674),
- REG16(0x678),
- REG16(0x67c),
- REG(0x068),
- REG(0x084),
- NOP(1),
-
- 0
-};
-
-static const u8 xehp_rcs_offsets[] = {
- NOP(1),
- LRI(13, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
-
- NOP(5),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- LRI(3, POSTED),
- REG(0x1b0),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
-
- 0
-};
-
-static const u8 dg2_rcs_offsets[] = {
- NOP(1),
- LRI(15, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
- REG(0x120),
- REG(0x124),
-
- NOP(1),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- LRI(3, POSTED),
- REG(0x1b0),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
-
- 0
-};
-
-static const u8 mtl_rcs_offsets[] = {
- NOP(1),
- LRI(15, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
- REG(0x120),
- REG(0x124),
-
- NOP(1),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- NOP(2),
- LRI(2, POSTED),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
-
- 0
-};
-
-#define XE2_CTX_COMMON \
- NOP(1), /* [0x00] */ \
- LRI(15, POSTED), /* [0x01] */ \
- REG16(0x244), /* [0x02] CTXT_SR_CTL */ \
- REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \
- REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \
- REG(0x038), /* [0x08] RING_BUFFER_START */ \
- REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \
- REG(0x168), /* [0x0c] BB_ADDR_UDW */ \
- REG(0x140), /* [0x0e] BB_ADDR */ \
- REG(0x110), /* [0x10] BB_STATE */ \
- REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \
- REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \
- REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
- REG(0x180), /* [0x18] CCID */ \
- REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \
- REG(0x120), /* [0x1c] PRT_BB_STATE */ \
- REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \
- \
- NOP(1), /* [0x20] */ \
- LRI(9, POSTED), /* [0x21] */ \
- REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \
- REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \
- REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \
- REG16(0x284), /* [0x28] dummy reg */ \
- REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \
- REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \
- REG16(0x278), /* [0x2e] CS_CTX_ASID */ \
- REG16(0x274), /* [0x30] PTBP_UDW */ \
- REG16(0x270) /* [0x32] PTBP_LDW */
-
-static const u8 xe2_rcs_offsets[] = {
- XE2_CTX_COMMON,
-
- NOP(2), /* [0x34] */
- LRI(2, POSTED), /* [0x36] */
- REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
- REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */
-
- NOP(6), /* [0x41] */
- LRI(1, 0), /* [0x47] */
- REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */
-
- 0
-};
-
-static const u8 xe2_bcs_offsets[] = {
- XE2_CTX_COMMON,
-
- NOP(4 + 8 + 1), /* [0x34] */
- LRI(2, POSTED), /* [0x41] */
- REG16(0x200), /* [0x42] BCS_SWCTRL */
- REG16(0x204), /* [0x44] BLIT_CCTL */
-
- 0
-};
-
-static const u8 xe2_xcs_offsets[] = {
- XE2_CTX_COMMON,
-
- 0
-};
-
-static const u8 xe2_indirect_ring_state_offsets[] = {
- NOP(1), /* [0x00] */
- LRI(5, POSTED), /* [0x01] */
- REG(0x034), /* [0x02] RING_BUFFER_HEAD */
- REG(0x030), /* [0x04] RING_BUFFER_TAIL */
- REG(0x038), /* [0x06] RING_BUFFER_START */
- REG(0x048), /* [0x08] RING_BUFFER_START_UDW */
- REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */
-
- NOP(5), /* [0x0c] */
- LRI(9, POSTED), /* [0x11] */
- REG(0x168), /* [0x12] BB_ADDR_UDW */
- REG(0x140), /* [0x14] BB_ADDR */
- REG(0x110), /* [0x16] BB_STATE */
- REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */
-
- NOP(12), /* [0x00] */
-
- 0
-};
-
-#undef REG16
-#undef REG
-#undef LRI
-#undef NOP
-
-static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
-{
- if (class == XE_ENGINE_CLASS_RENDER) {
- if (GRAPHICS_VER(xe) >= 20)
- return xe2_rcs_offsets;
- else if (GRAPHICS_VERx100(xe) >= 1270)
- return mtl_rcs_offsets;
- else if (GRAPHICS_VERx100(xe) >= 1255)
- return dg2_rcs_offsets;
- else if (GRAPHICS_VERx100(xe) >= 1250)
- return xehp_rcs_offsets;
- else
- return gen12_rcs_offsets;
- } else if (class == XE_ENGINE_CLASS_COPY) {
- if (GRAPHICS_VER(xe) >= 20)
- return xe2_bcs_offsets;
- else
- return gen12_xcs_offsets;
- } else {
- if (GRAPHICS_VER(xe) >= 20)
- return xe2_xcs_offsets;
- else if (GRAPHICS_VERx100(xe) >= 1255)
- return dg2_xcs_offsets;
- else
- return gen12_xcs_offsets;
- }
-}
-
-static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
-{
- regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
- CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
-
- if (xe_gt_has_indirect_ring_state(hwe->gt))
- regs[CTX_CONTEXT_CONTROL] |=
- _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
-
- /* TODO: Timestamp */
-}
-
-static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
-{
- struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->sriov.vf.memirq;
- struct xe_device *xe = gt_to_xe(hwe->gt);
-
- if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
- return;
-
- regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
- MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
- regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
- regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
-
- regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
- MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
- regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
- regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
- regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
- regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
-}
-
-static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
-{
- struct xe_device *xe = gt_to_xe(hwe->gt);
-
- if (GRAPHICS_VERx100(xe) >= 1250)
- return 0x70;
- else
- return 0x60;
-}
-
-static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
-{
- int x;
-
- x = lrc_ring_mi_mode(hwe);
- regs[x + 1] &= ~STOP_RING;
- regs[x + 1] |= STOP_RING << 16;
-}
-
-static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
-{
- return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
-}
-
-static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
-{
- return 0;
-}
-
-u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
-{
- return lrc->ring.size;
-}
-
-/* Make the magic macros work */
-#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
-#define __xe_lrc_regs_offset xe_lrc_regs_offset
-
-#define LRC_SEQNO_PPHWSP_OFFSET 512
-#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
-#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
-#define LRC_PARALLEL_PPHWSP_OFFSET 2048
-#define LRC_PPHWSP_SIZE SZ_4K
-
-u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
-{
- return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
-}
-
-static size_t lrc_reg_size(struct xe_device *xe)
-{
- if (GRAPHICS_VERx100(xe) >= 1250)
- return 96 * sizeof(u32);
- else
- return 80 * sizeof(u32);
-}
-
-size_t xe_lrc_skip_size(struct xe_device *xe)
-{
- return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
-}
-
-static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
-{
- /* The seqno is stored in the driver-defined portion of PPHWSP */
- return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
-}
-
-static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
-{
- /* The start seqno is stored in the driver-defined portion of PPHWSP */
- return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
-}
-
-static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
-{
- /* The start seqno is stored in the driver-defined portion of PPHWSP */
- return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
-}
-
-static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
-{
- /* The parallel is stored in the driver-defined portion of PPHWSP */
- return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
-}
-
-static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
-{
- return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
-}
-
-static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
-{
- /* Indirect ring state page is at the very end of LRC */
- return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
-}
-
-#define DECL_MAP_ADDR_HELPERS(elem) \
-static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
-{ \
- struct iosys_map map = lrc->bo->vmap; \
-\
- xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \
- iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
- return map; \
-} \
-static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
-{ \
- return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
-} \
-
-DECL_MAP_ADDR_HELPERS(ring)
-DECL_MAP_ADDR_HELPERS(pphwsp)
-DECL_MAP_ADDR_HELPERS(seqno)
-DECL_MAP_ADDR_HELPERS(regs)
-DECL_MAP_ADDR_HELPERS(start_seqno)
-DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
-DECL_MAP_ADDR_HELPERS(ctx_timestamp)
-DECL_MAP_ADDR_HELPERS(parallel)
-DECL_MAP_ADDR_HELPERS(indirect_ring)
-
-#undef DECL_MAP_ADDR_HELPERS
-
-/**
- * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address
- * @lrc: Pointer to the lrc.
- *
- * Returns: ctx timestamp GGTT address
- */
-u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
-}
-
-/**
- * xe_lrc_ctx_timestamp() - Read ctx timestamp value
- * @lrc: Pointer to the lrc.
- *
- * Returns: ctx timestamp value
- */
-u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_ctx_timestamp_map(lrc);
- return xe_map_read32(xe, &map);
-}
-
-/**
- * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
- * @lrc: Pointer to the lrc.
- *
- * Returns: ctx timestamp job GGTT address
- */
-u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
-}
-
-/**
- * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value
- * @lrc: Pointer to the lrc.
- *
- * Returns: ctx timestamp job value
- */
-u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_ctx_job_timestamp_map(lrc);
- return xe_map_read32(xe, &map);
-}
-
-u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_pphwsp_ggtt_addr(lrc);
-}
-
-u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
-{
- if (!xe_lrc_has_indirect_ring_state(lrc))
- return 0;
-
- return __xe_lrc_indirect_ring_ggtt_addr(lrc);
-}
-
-static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_indirect_ring_map(lrc);
- iosys_map_incr(&map, reg_nr * sizeof(u32));
- return xe_map_read32(xe, &map);
-}
-
-static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
- int reg_nr, u32 val)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_indirect_ring_map(lrc);
- iosys_map_incr(&map, reg_nr * sizeof(u32));
- xe_map_write32(xe, &map, val);
-}
-
-u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_regs_map(lrc);
- iosys_map_incr(&map, reg_nr * sizeof(u32));
- return xe_map_read32(xe, &map);
-}
-
-void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_regs_map(lrc);
- iosys_map_incr(&map, reg_nr * sizeof(u32));
- xe_map_write32(xe, &map, val);
-}
-
-static void *empty_lrc_data(struct xe_hw_engine *hwe)
-{
- struct xe_gt *gt = hwe->gt;
- void *data;
- u32 *regs;
-
- data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
- if (!data)
- return NULL;
-
- /* 1st page: Per-Process of HW status Page */
- regs = data + LRC_PPHWSP_SIZE;
- set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
- set_context_control(regs, hwe);
- set_memory_based_intr(regs, hwe);
- reset_stop_ring(regs, hwe);
- if (xe_gt_has_indirect_ring_state(gt)) {
- regs = data + xe_gt_lrc_size(gt, hwe->class) -
- LRC_INDIRECT_RING_STATE_SIZE;
- set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
- }
-
- return data;
-}
-
-static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
-{
- u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
-
- xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
- xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
-}
-
-static void xe_lrc_finish(struct xe_lrc *lrc)
-{
- xe_hw_fence_ctx_finish(&lrc->fence_ctx);
- xe_bo_lock(lrc->bo, false);
- xe_bo_unpin(lrc->bo);
- xe_bo_unlock(lrc->bo);
- xe_bo_put(lrc->bo);
-}
-
-#define PVC_CTX_ASID (0x2e + 1)
-#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
-
-static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
- struct xe_vm *vm, u32 ring_size)
-{
- struct xe_gt *gt = hwe->gt;
- struct xe_tile *tile = gt_to_tile(gt);
- struct xe_device *xe = gt_to_xe(gt);
- struct iosys_map map;
- void *init_data = NULL;
- u32 arb_enable;
- u32 lrc_size;
- int err;
-
- kref_init(&lrc->refcount);
- lrc->flags = 0;
- lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
- if (xe_gt_has_indirect_ring_state(gt))
- lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
-
- /*
- * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
- * via VM bind calls.
- */
- lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
- if (IS_ERR(lrc->bo))
- return PTR_ERR(lrc->bo);
-
- lrc->size = lrc_size;
- lrc->tile = gt_to_tile(hwe->gt);
- lrc->ring.size = ring_size;
- lrc->ring.tail = 0;
- lrc->ctx_timestamp = 0;
-
- xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
- hwe->fence_irq, hwe->name);
-
- if (!gt->default_lrc[hwe->class]) {
- init_data = empty_lrc_data(hwe);
- if (!init_data) {
- err = -ENOMEM;
- goto err_lrc_finish;
- }
- }
-
- /*
- * Init Per-Process of HW status Page, LRC / context state to known
- * values
- */
- map = __xe_lrc_pphwsp_map(lrc);
- if (!init_data) {
- xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */
- xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
- gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
- xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
- } else {
- xe_map_memcpy_to(xe, &map, 0, init_data,
- xe_gt_lrc_size(gt, hwe->class));
- kfree(init_data);
- }
-
- if (vm) {
- xe_lrc_set_ppgtt(lrc, vm);
-
- if (vm->xef)
- xe_drm_client_add_bo(vm->xef->client, lrc->bo);
- }
-
- if (xe_gt_has_indirect_ring_state(gt)) {
- xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
- __xe_lrc_indirect_ring_ggtt_addr(lrc));
-
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
- __xe_lrc_ring_ggtt_addr(lrc));
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
- RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
- } else {
- xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
- xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
- xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
- xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
- RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
- }
-
- xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
-
- if (xe->info.has_asid && vm)
- xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
-
- lrc->desc = LRC_VALID;
- lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
- /* TODO: Priority */
-
- /* While this appears to have something about privileged batches or
- * some such, it really just means PPGTT mode.
- */
- if (vm)
- lrc->desc |= LRC_PRIVILEGE;
-
- if (GRAPHICS_VERx100(xe) < 1250) {
- lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
- lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
- }
-
- arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
- xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
-
- map = __xe_lrc_seqno_map(lrc);
- xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
-
- map = __xe_lrc_start_seqno_map(lrc);
- xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
-
- return 0;
-
-err_lrc_finish:
- xe_lrc_finish(lrc);
- return err;
-}
-
-/**
- * xe_lrc_create - Create a LRC
- * @hwe: Hardware Engine
- * @vm: The VM (address space)
- * @ring_size: LRC ring size
- *
- * Allocate and initialize the Logical Ring Context (LRC).
- *
- * Return pointer to created LRC upon success and an error pointer
- * upon failure.
- */
-struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
- u32 ring_size)
-{
- struct xe_lrc *lrc;
- int err;
-
- lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
- if (!lrc)
- return ERR_PTR(-ENOMEM);
-
- err = xe_lrc_init(lrc, hwe, vm, ring_size);
- if (err) {
- kfree(lrc);
- return ERR_PTR(err);
- }
-
- return lrc;
-}
-
-/**
- * xe_lrc_destroy - Destroy the LRC
- * @ref: reference to LRC
- *
- * Called when ref == 0, release resources held by the Logical Ring Context
- * (LRC) and free the LRC memory.
- */
-void xe_lrc_destroy(struct kref *ref)
-{
- struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
-
- xe_lrc_finish(lrc);
- kfree(lrc);
-}
-
-void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
-{
- if (xe_lrc_has_indirect_ring_state(lrc))
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
- else
- xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
-}
-
-u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
-{
- if (xe_lrc_has_indirect_ring_state(lrc))
- return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
- else
- return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
-}
-
-void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
-{
- if (xe_lrc_has_indirect_ring_state(lrc))
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
- else
- xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
-}
-
-u32 xe_lrc_ring_head(struct xe_lrc *lrc)
-{
- if (xe_lrc_has_indirect_ring_state(lrc))
- return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
- else
- return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
-}
-
-u32 xe_lrc_ring_space(struct xe_lrc *lrc)
-{
- const u32 head = xe_lrc_ring_head(lrc);
- const u32 tail = lrc->ring.tail;
- const u32 size = lrc->ring.size;
-
- return ((head - tail - 1) & (size - 1)) + 1;
-}
-
-static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
- const void *data, size_t size)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
-
- iosys_map_incr(&ring, lrc->ring.tail);
- xe_map_memcpy_to(xe, &ring, 0, data, size);
- lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
-}
-
-void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map ring;
- u32 rhs;
- size_t aligned_size;
-
- xe_assert(xe, IS_ALIGNED(size, 4));
- aligned_size = ALIGN(size, 8);
-
- ring = __xe_lrc_ring_map(lrc);
-
- xe_assert(xe, lrc->ring.tail < lrc->ring.size);
- rhs = lrc->ring.size - lrc->ring.tail;
- if (size > rhs) {
- __xe_lrc_write_ring(lrc, ring, data, rhs);
- __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
- } else {
- __xe_lrc_write_ring(lrc, ring, data, size);
- }
-
- if (aligned_size > size) {
- u32 noop = MI_NOOP;
-
- __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
- }
-}
-
-u64 xe_lrc_descriptor(struct xe_lrc *lrc)
-{
- return lrc->desc | xe_lrc_ggtt_addr(lrc);
-}
-
-u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_seqno_ggtt_addr(lrc);
-}
-
-/**
- * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
- *
- * Allocate but don't initialize an lrc seqno fence.
- *
- * Return: Pointer to the allocated fence or
- * negative error pointer on error.
- */
-struct dma_fence *xe_lrc_alloc_seqno_fence(void)
-{
- return xe_hw_fence_alloc();
-}
-
-/**
- * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
- * @fence: Pointer to the fence to free.
- *
- * Frees an lrc seqno fence that hasn't yet been
- * initialized.
- */
-void xe_lrc_free_seqno_fence(struct dma_fence *fence)
-{
- xe_hw_fence_free(fence);
-}
-
-/**
- * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
- * @lrc: Pointer to the lrc.
- * @fence: Pointer to the fence to initialize.
- *
- * Initializes a pre-allocated lrc seqno fence.
- * After initialization, the fence is subject to normal
- * dma-fence refcounting.
- */
-void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
-{
- xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
-}
-
-s32 xe_lrc_seqno(struct xe_lrc *lrc)
-{
- struct iosys_map map = __xe_lrc_seqno_map(lrc);
-
- return xe_map_read32(lrc_to_xe(lrc), &map);
-}
-
-s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
-{
- struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
-
- return xe_map_read32(lrc_to_xe(lrc), &map);
-}
-
-u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_start_seqno_ggtt_addr(lrc);
-}
-
-u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_parallel_ggtt_addr(lrc);
-}
-
-struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
-{
- return __xe_lrc_parallel_map(lrc);
-}
-
-static int instr_dw(u32 cmd_header)
-{
- /* GFXPIPE "SINGLE_DW" opcodes are a single dword */
- if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
- GFXPIPE_SINGLE_DW_CMD(0, 0))
- return 1;
-
- /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
- if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
- return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
-
- /* Most instructions have the # of dwords (minus 2) in 7:0 */
- return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
-}
-
-static int dump_mi_command(struct drm_printer *p,
- struct xe_gt *gt,
- u32 *dw,
- int remaining_dw)
-{
- u32 inst_header = *dw;
- u32 numdw = instr_dw(inst_header);
- u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
- int num_noop;
-
- /* First check for commands that don't have/use a '# DW' field */
- switch (inst_header & MI_OPCODE) {
- case MI_NOOP:
- num_noop = 1;
- while (num_noop < remaining_dw &&
- (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
- num_noop++;
- drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
- return num_noop;
-
- case MI_TOPOLOGY_FILTER:
- drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
- return 1;
-
- case MI_BATCH_BUFFER_END:
- drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
- /* Return 'remaining_dw' to consume the rest of the LRC */
- return remaining_dw;
- }
-
- /*
- * Any remaining commands include a # of dwords. We should make sure
- * it doesn't exceed the remaining size of the LRC.
- */
- if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
- numdw = remaining_dw;
-
- switch (inst_header & MI_OPCODE) {
- case MI_LOAD_REGISTER_IMM:
- drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
- inst_header, (numdw - 1) / 2);
- for (int i = 1; i < numdw; i += 2)
- drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
- return numdw;
-
- case MI_LOAD_REGISTER_MEM & MI_OPCODE:
- drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
- inst_header,
- dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
- dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
- if (numdw == 4)
- drm_printf(p, " - %#6x = %#010llx\n",
- dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
- else
- drm_printf(p, " - %*ph (%s)\n",
- (int)sizeof(u32) * (numdw - 1), dw + 1,
- numdw < 4 ? "truncated" : "malformed");
- return numdw;
-
- case MI_FORCE_WAKEUP:
- drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
- return numdw;
-
- default:
- drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
- inst_header, opcode, numdw);
- return numdw;
- }
-}
-
-static int dump_gfxpipe_command(struct drm_printer *p,
- struct xe_gt *gt,
- u32 *dw,
- int remaining_dw)
-{
- u32 numdw = instr_dw(*dw);
- u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
- u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
- u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
-
- /*
- * Make sure we haven't mis-parsed a number of dwords that exceeds the
- * remaining size of the LRC.
- */
- if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
- numdw = remaining_dw;
-
- switch (*dw & GFXPIPE_MATCH_MASK) {
-#define MATCH(cmd) \
- case cmd: \
- drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
- return numdw
-#define MATCH3D(cmd) \
- case CMD_##cmd: \
- drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
- return numdw
-
- MATCH(STATE_BASE_ADDRESS);
- MATCH(STATE_SIP);
- MATCH(GPGPU_CSR_BASE_ADDRESS);
- MATCH(STATE_COMPUTE_MODE);
- MATCH3D(3DSTATE_BTD);
- MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
- MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
-
- MATCH3D(3DSTATE_VF_STATISTICS);
-
- MATCH(PIPELINE_SELECT);
-
- MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
- MATCH3D(3DSTATE_CLEAR_PARAMS);
- MATCH3D(3DSTATE_DEPTH_BUFFER);
- MATCH3D(3DSTATE_STENCIL_BUFFER);
- MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
- MATCH3D(3DSTATE_VERTEX_BUFFERS);
- MATCH3D(3DSTATE_VERTEX_ELEMENTS);
- MATCH3D(3DSTATE_INDEX_BUFFER);
- MATCH3D(3DSTATE_VF);
- MATCH3D(3DSTATE_MULTISAMPLE);
- MATCH3D(3DSTATE_CC_STATE_POINTERS);
- MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
- MATCH3D(3DSTATE_VS);
- MATCH3D(3DSTATE_GS);
- MATCH3D(3DSTATE_CLIP);
- MATCH3D(3DSTATE_SF);
- MATCH3D(3DSTATE_WM);
- MATCH3D(3DSTATE_CONSTANT_VS);
- MATCH3D(3DSTATE_CONSTANT_GS);
- MATCH3D(3DSTATE_CONSTANT_PS);
- MATCH3D(3DSTATE_SAMPLE_MASK);
- MATCH3D(3DSTATE_CONSTANT_HS);
- MATCH3D(3DSTATE_CONSTANT_DS);
- MATCH3D(3DSTATE_HS);
- MATCH3D(3DSTATE_TE);
- MATCH3D(3DSTATE_DS);
- MATCH3D(3DSTATE_STREAMOUT);
- MATCH3D(3DSTATE_SBE);
- MATCH3D(3DSTATE_PS);
- MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
- MATCH3D(3DSTATE_CPS_POINTERS);
- MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
- MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
- MATCH3D(3DSTATE_VF_INSTANCING);
- MATCH3D(3DSTATE_VF_SGVS);
- MATCH3D(3DSTATE_VF_TOPOLOGY);
- MATCH3D(3DSTATE_WM_CHROMAKEY);
- MATCH3D(3DSTATE_PS_BLEND);
- MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
- MATCH3D(3DSTATE_PS_EXTRA);
- MATCH3D(3DSTATE_RASTER);
- MATCH3D(3DSTATE_SBE_SWIZ);
- MATCH3D(3DSTATE_WM_HZ_OP);
- MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
- MATCH3D(3DSTATE_VF_SGVS_2);
- MATCH3D(3DSTATE_VFG);
- MATCH3D(3DSTATE_URB_ALLOC_VS);
- MATCH3D(3DSTATE_URB_ALLOC_HS);
- MATCH3D(3DSTATE_URB_ALLOC_DS);
- MATCH3D(3DSTATE_URB_ALLOC_GS);
- MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
- MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
- MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
- MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
- MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
- MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
- MATCH3D(3DSTATE_AMFS);
- MATCH3D(3DSTATE_DEPTH_BOUNDS);
- MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
- MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
- MATCH3D(3DSTATE_MESH_CONTROL);
- MATCH3D(3DSTATE_MESH_DISTRIB);
- MATCH3D(3DSTATE_TASK_REDISTRIB);
- MATCH3D(3DSTATE_MESH_SHADER);
- MATCH3D(3DSTATE_MESH_SHADER_DATA);
- MATCH3D(3DSTATE_TASK_CONTROL);
- MATCH3D(3DSTATE_TASK_SHADER);
- MATCH3D(3DSTATE_TASK_SHADER_DATA);
- MATCH3D(3DSTATE_URB_ALLOC_MESH);
- MATCH3D(3DSTATE_URB_ALLOC_TASK);
- MATCH3D(3DSTATE_CLIP_MESH);
- MATCH3D(3DSTATE_SBE_MESH);
- MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
-
- MATCH3D(3DSTATE_DRAWING_RECTANGLE);
- MATCH3D(3DSTATE_CHROMA_KEY);
- MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
- MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
- MATCH3D(3DSTATE_LINE_STIPPLE);
- MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
- MATCH3D(3DSTATE_MONOFILTER_SIZE);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
- MATCH3D(3DSTATE_SO_DECL_LIST);
- MATCH3D(3DSTATE_SO_BUFFER);
- MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
- MATCH3D(3DSTATE_SAMPLE_PATTERN);
- MATCH3D(3DSTATE_3D_MODE);
- MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
- MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
- MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
-
- default:
- drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
- *dw, pipeline, opcode, subopcode, numdw);
- return numdw;
- }
-}
-
-static int dump_gfx_state_command(struct drm_printer *p,
- struct xe_gt *gt,
- u32 *dw,
- int remaining_dw)
-{
- u32 numdw = instr_dw(*dw);
- u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
-
- /*
- * Make sure we haven't mis-parsed a number of dwords that exceeds the
- * remaining size of the LRC.
- */
- if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
- numdw = remaining_dw;
-
- switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
- MATCH(STATE_WRITE_INLINE);
-
- default:
- drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
- *dw, opcode, numdw);
- return numdw;
- }
-}
-
-void xe_lrc_dump_default(struct drm_printer *p,
- struct xe_gt *gt,
- enum xe_engine_class hwe_class)
-{
- u32 *dw;
- int remaining_dw, num_dw;
-
- if (!gt->default_lrc[hwe_class]) {
- drm_printf(p, "No default LRC for class %d\n", hwe_class);
- return;
- }
-
- /*
- * Skip the beginning of the LRC since it contains the per-process
- * hardware status page.
- */
- dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
- remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
-
- while (remaining_dw > 0) {
- if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
- num_dw = dump_mi_command(p, gt, dw, remaining_dw);
- } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
- num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
- } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
- num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
- } else {
- num_dw = min(instr_dw(*dw), remaining_dw);
- drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
- *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
- num_dw);
- }
-
- dw += num_dw;
- remaining_dw -= num_dw;
- }
-}
-
-struct instr_state {
- u32 instr;
- u16 num_dw;
-};
-
-static const struct instr_state xe_hpg_svg_state[] = {
- { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
- { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
- { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
- { .instr = CMD_3DSTATE_VS, .num_dw = 9 },
- { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
- { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
- { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
- { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
- { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
- { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
- { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
- { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
- { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SF, .num_dw = 4 },
- { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
- { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
- { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
- { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
- { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
- { .instr = CMD_3DSTATE_HS, .num_dw = 9 },
- { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
- { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
- { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
- { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
- { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
- { .instr = CMD_3DSTATE_TE, .num_dw = 5 },
- { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
- { .instr = CMD_3DSTATE_DS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
- { .instr = CMD_3DSTATE_GS, .num_dw = 10 },
- { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
- { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
- { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
- { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
- { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
- { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
-};
-
-void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
-{
- struct xe_gt *gt = q->hwe->gt;
- struct xe_device *xe = gt_to_xe(gt);
- const struct instr_state *state_table = NULL;
- int state_table_size = 0;
-
- /*
- * Wa_14019789679
- *
- * If the driver doesn't explicitly emit the SVG instructions while
- * setting up the default LRC, the context switch will write 0's
- * (noops) into the LRC memory rather than the expected instruction
- * headers. Application contexts start out as a copy of the default
- * LRC, and if they also do not emit specific settings for some SVG
- * state, then on context restore they'll unintentionally inherit
- * whatever state setting the previous context had programmed into the
- * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will
- * prevent the hardware from resetting that state back to any specific
- * value).
- *
- * The official workaround only requires emitting 3DSTATE_MESH_CONTROL
- * since that's a specific state setting that can easily cause GPU
- * hangs if unintentionally inherited. However to be safe we'll
- * continue to emit all of the SVG state since it's best not to leak
- * any of the state between contexts, even if that leakage is harmless.
- */
- if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
- state_table = xe_hpg_svg_state;
- state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
- }
-
- if (!state_table) {
- xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
- GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
- return;
- }
-
- for (int i = 0; i < state_table_size; i++) {
- u32 instr = state_table[i].instr;
- u16 num_dw = state_table[i].num_dw;
- bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
-
- xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
- xe_gt_assert(gt, num_dw != 0);
- xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
-
- /*
- * Xe2's SVG context is the same as the one on DG2 / MTL
- * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
- * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
- * Just make the replacement here rather than defining a
- * whole separate table for the single trivial change.
- */
- if (GRAPHICS_VER(xe) >= 20 &&
- instr == CMD_3DSTATE_DRAWING_RECTANGLE)
- instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
-
- bb->cs[bb->len] = instr;
- if (!is_single_dw)
- bb->cs[bb->len] |= (num_dw - 2);
-
- bb->len += num_dw;
- }
-}
-
-struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
-{
- struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
-
- if (!snapshot)
- return NULL;
-
- if (lrc->bo->vm)
- xe_vm_get(lrc->bo->vm);
-
- snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
- snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
- snapshot->head = xe_lrc_ring_head(lrc);
- snapshot->tail.internal = lrc->ring.tail;
- snapshot->tail.memory = xe_lrc_ring_tail(lrc);
- snapshot->start_seqno = xe_lrc_start_seqno(lrc);
- snapshot->seqno = xe_lrc_seqno(lrc);
- snapshot->lrc_bo = xe_bo_get(lrc->bo);
- snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
- snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
- snapshot->lrc_snapshot = NULL;
- snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
- snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
- return snapshot;
-}
-
-void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
-{
- struct xe_bo *bo;
- struct xe_vm *vm;
- struct iosys_map src;
-
- if (!snapshot)
- return;
-
- bo = snapshot->lrc_bo;
- vm = bo->vm;
- snapshot->lrc_bo = NULL;
-
- snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
- if (!snapshot->lrc_snapshot)
- goto put_bo;
-
- xe_bo_lock(bo, false);
- if (!ttm_bo_vmap(&bo->ttm, &src)) {
- xe_map_memcpy_from(xe_bo_device(bo),
- snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
- snapshot->lrc_size);
- ttm_bo_vunmap(&bo->ttm, &src);
- } else {
- kvfree(snapshot->lrc_snapshot);
- snapshot->lrc_snapshot = NULL;
- }
- xe_bo_unlock(bo);
-put_bo:
- xe_bo_put(bo);
- if (vm)
- xe_vm_put(vm);
-}
-
-void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
-{
- unsigned long i;
-
- if (!snapshot)
- return;
-
- drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
- drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
- snapshot->indirect_context_desc);
- drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
- drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
- snapshot->tail.internal, snapshot->tail.memory);
- drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
- drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
- drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
- drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
-
- if (!snapshot->lrc_snapshot)
- return;
-
- drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
- drm_puts(p, "\t[HWSP].data: ");
- for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
- u32 *val = snapshot->lrc_snapshot + i;
- char dumped[ASCII85_BUFSZ];
-
- drm_puts(p, ascii85_encode(*val, dumped));
- }
-
- drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
- drm_puts(p, "\t[HWCTX].data: ");
- for (; i < snapshot->lrc_size; i += sizeof(u32)) {
- u32 *val = snapshot->lrc_snapshot + i;
- char dumped[ASCII85_BUFSZ];
-
- drm_puts(p, ascii85_encode(*val, dumped));
- }
- drm_puts(p, "\n");
-}
-
-void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
-{
- if (!snapshot)
- return;
-
- kvfree(snapshot->lrc_snapshot);
- if (snapshot->lrc_bo) {
- struct xe_vm *vm;
-
- vm = snapshot->lrc_bo->vm;
- xe_bo_put(snapshot->lrc_bo);
- if (vm)
- xe_vm_put(vm);
- }
- kfree(snapshot);
-}
-
-/**
- * xe_lrc_update_timestamp() - Update ctx timestamp
- * @lrc: Pointer to the lrc.
- * @old_ts: Old timestamp value
- *
- * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
- * update saved value.
- *
- * Returns: New ctx timestamp value
- */
-u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
-{
- *old_ts = lrc->ctx_timestamp;
-
- lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
-
- return lrc->ctx_timestamp;
-}
diff --git a/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage b/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage
deleted file mode 100644
index bee934c9371f..000000000000
--- a/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage
+++ /dev/null
@@ -1,1784 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2021 Intel Corporation
- */
-
-#include "xe_lrc.h"
-
-#include <generated/xe_wa_oob.h>
-
-#include <linux/ascii85.h>
-
-#include "instructions/xe_mi_commands.h"
-#include "instructions/xe_gfxpipe_commands.h"
-#include "instructions/xe_gfx_state_commands.h"
-#include "regs/xe_engine_regs.h"
-#include "regs/xe_lrc_layout.h"
-#include "xe_bb.h"
-#include "xe_bo.h"
-#include "xe_device.h"
-#include "xe_drm_client.h"
-#include "xe_exec_queue_types.h"
-#include "xe_gt.h"
-#include "xe_gt_printk.h"
-#include "xe_hw_fence.h"
-#include "xe_map.h"
-#include "xe_memirq.h"
-#include "xe_sriov.h"
-#include "xe_vm.h"
-#include "xe_wa.h"
-
-#define LRC_VALID BIT_ULL(0)
-#define LRC_PRIVILEGE BIT_ULL(8)
-#define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3)
-#define LRC_LEGACY_64B_CONTEXT 3
-
-#define LRC_ENGINE_CLASS GENMASK_ULL(63, 61)
-#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48)
-
-#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
-
-struct xe_lrc_snapshot {
- struct xe_bo *lrc_bo;
- void *lrc_snapshot;
- unsigned long lrc_size, lrc_offset;
-
- u32 context_desc;
- u32 indirect_context_desc;
- u32 head;
- struct {
- u32 internal;
- u32 memory;
- } tail;
- u32 start_seqno;
- u32 seqno;
- u32 ctx_timestamp;
- u32 ctx_job_timestamp;
-};
-
-static struct xe_device *
-lrc_to_xe(struct xe_lrc *lrc)
-{
- return gt_to_xe(lrc->fence_ctx.gt);
-}
-
-size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
-{
- struct xe_device *xe = gt_to_xe(gt);
- size_t size;
-
- switch (class) {
- case XE_ENGINE_CLASS_RENDER:
- if (GRAPHICS_VER(xe) >= 20)
- size = 4 * SZ_4K;
- else
- size = 14 * SZ_4K;
- break;
- case XE_ENGINE_CLASS_COMPUTE:
- /* 14 pages since graphics_ver == 11 */
- if (GRAPHICS_VER(xe) >= 20)
- size = 3 * SZ_4K;
- else
- size = 14 * SZ_4K;
- break;
- default:
- WARN(1, "Unknown engine class: %d", class);
- fallthrough;
- case XE_ENGINE_CLASS_COPY:
- case XE_ENGINE_CLASS_VIDEO_DECODE:
- case XE_ENGINE_CLASS_VIDEO_ENHANCE:
- case XE_ENGINE_CLASS_OTHER:
- size = 2 * SZ_4K;
- }
-
- /* Add indirect ring state page */
- if (xe_gt_has_indirect_ring_state(gt))
- size += LRC_INDIRECT_RING_STATE_SIZE;
-
- return size;
-}
-
-/*
- * The per-platform tables are u8-encoded in @data. Decode @data and set the
- * addresses' offset and commands in @regs. The following encoding is used
- * for each byte. There are 2 steps: decoding commands and decoding addresses.
- *
- * Commands:
- * [7]: create NOPs - number of NOPs are set in lower bits
- * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
- * MI_LRI_FORCE_POSTED
- * [5:0]: Number of NOPs or registers to set values to in case of
- * MI_LOAD_REGISTER_IMM
- *
- * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
- * number of registers. They are set by using the REG/REG16 macros: the former
- * is used for offsets smaller than 0x200 while the latter is for values bigger
- * than that. Those macros already set all the bits documented below correctly:
- *
- * [7]: When a register offset needs more than 6 bits, use additional bytes, to
- * follow, for the lower bits
- * [6:0]: Register offset, without considering the engine base.
- *
- * This function only tweaks the commands and register offsets. Values are not
- * filled out.
- */
-static void set_offsets(u32 *regs,
- const u8 *data,
- const struct xe_hw_engine *hwe)
-#define NOP(x) (BIT(7) | (x))
-#define LRI(count, flags) ((flags) << 6 | (count) | \
- BUILD_BUG_ON_ZERO(count >= BIT(6)))
-#define POSTED BIT(0)
-#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
-#define REG16(x) \
- (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
- (((x) >> 2) & 0x7f)
-{
- const u32 base = hwe->mmio_base;
-
- while (*data) {
- u8 count, flags;
-
- if (*data & BIT(7)) { /* skip */
- count = *data++ & ~BIT(7);
- regs += count;
- continue;
- }
-
- count = *data & 0x3f;
- flags = *data >> 6;
- data++;
-
- *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
- if (flags & POSTED)
- *regs |= MI_LRI_FORCE_POSTED;
- *regs |= MI_LRI_LRM_CS_MMIO;
- regs++;
-
- xe_gt_assert(hwe->gt, count);
- do {
- u32 offset = 0;
- u8 v;
-
- do {
- v = *data++;
- offset <<= 7;
- offset |= v & ~BIT(7);
- } while (v & BIT(7));
-
- regs[0] = base + (offset << 2);
- regs += 2;
- } while (--count);
- }
-
- *regs = MI_BATCH_BUFFER_END | BIT(0);
-}
-
-static const u8 gen12_xcs_offsets[] = {
- NOP(1),
- LRI(13, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
-
- NOP(5),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- 0
-};
-
-static const u8 dg2_xcs_offsets[] = {
- NOP(1),
- LRI(15, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
- REG(0x120),
- REG(0x124),
-
- NOP(1),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- 0
-};
-
-static const u8 gen12_rcs_offsets[] = {
- NOP(1),
- LRI(13, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
-
- NOP(5),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- LRI(3, POSTED),
- REG(0x1b0),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
- NOP(3 + 9 + 1),
-
- LRI(51, POSTED),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG(0x028),
- REG(0x09c),
- REG(0x0c0),
- REG(0x178),
- REG(0x17c),
- REG16(0x358),
- REG(0x170),
- REG(0x150),
- REG(0x154),
- REG(0x158),
- REG16(0x41c),
- REG16(0x600),
- REG16(0x604),
- REG16(0x608),
- REG16(0x60c),
- REG16(0x610),
- REG16(0x614),
- REG16(0x618),
- REG16(0x61c),
- REG16(0x620),
- REG16(0x624),
- REG16(0x628),
- REG16(0x62c),
- REG16(0x630),
- REG16(0x634),
- REG16(0x638),
- REG16(0x63c),
- REG16(0x640),
- REG16(0x644),
- REG16(0x648),
- REG16(0x64c),
- REG16(0x650),
- REG16(0x654),
- REG16(0x658),
- REG16(0x65c),
- REG16(0x660),
- REG16(0x664),
- REG16(0x668),
- REG16(0x66c),
- REG16(0x670),
- REG16(0x674),
- REG16(0x678),
- REG16(0x67c),
- REG(0x068),
- REG(0x084),
- NOP(1),
-
- 0
-};
-
-static const u8 xehp_rcs_offsets[] = {
- NOP(1),
- LRI(13, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
-
- NOP(5),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- LRI(3, POSTED),
- REG(0x1b0),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
-
- 0
-};
-
-static const u8 dg2_rcs_offsets[] = {
- NOP(1),
- LRI(15, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
- REG(0x120),
- REG(0x124),
-
- NOP(1),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- LRI(3, POSTED),
- REG(0x1b0),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
-
- 0
-};
-
-static const u8 mtl_rcs_offsets[] = {
- NOP(1),
- LRI(15, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
- REG(0x120),
- REG(0x124),
-
- NOP(1),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- NOP(2),
- LRI(2, POSTED),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
-
- 0
-};
-
-#define XE2_CTX_COMMON \
- NOP(1), /* [0x00] */ \
- LRI(15, POSTED), /* [0x01] */ \
- REG16(0x244), /* [0x02] CTXT_SR_CTL */ \
- REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \
- REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \
- REG(0x038), /* [0x08] RING_BUFFER_START */ \
- REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \
- REG(0x168), /* [0x0c] BB_ADDR_UDW */ \
- REG(0x140), /* [0x0e] BB_ADDR */ \
- REG(0x110), /* [0x10] BB_STATE */ \
- REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \
- REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \
- REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
- REG(0x180), /* [0x18] CCID */ \
- REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \
- REG(0x120), /* [0x1c] PRT_BB_STATE */ \
- REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \
- \
- NOP(1), /* [0x20] */ \
- LRI(9, POSTED), /* [0x21] */ \
- REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \
- REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \
- REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \
- REG16(0x284), /* [0x28] dummy reg */ \
- REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \
- REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \
- REG16(0x278), /* [0x2e] CS_CTX_ASID */ \
- REG16(0x274), /* [0x30] PTBP_UDW */ \
- REG16(0x270) /* [0x32] PTBP_LDW */
-
-static const u8 xe2_rcs_offsets[] = {
- XE2_CTX_COMMON,
-
- NOP(2), /* [0x34] */
- LRI(2, POSTED), /* [0x36] */
- REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
- REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */
-
- NOP(6), /* [0x41] */
- LRI(1, 0), /* [0x47] */
- REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */
-
- 0
-};
-
-static const u8 xe2_bcs_offsets[] = {
- XE2_CTX_COMMON,
-
- NOP(4 + 8 + 1), /* [0x34] */
- LRI(2, POSTED), /* [0x41] */
- REG16(0x200), /* [0x42] BCS_SWCTRL */
- REG16(0x204), /* [0x44] BLIT_CCTL */
-
- 0
-};
-
-static const u8 xe2_xcs_offsets[] = {
- XE2_CTX_COMMON,
-
- 0
-};
-
-static const u8 xe2_indirect_ring_state_offsets[] = {
- NOP(1), /* [0x00] */
- LRI(5, POSTED), /* [0x01] */
- REG(0x034), /* [0x02] RING_BUFFER_HEAD */
- REG(0x030), /* [0x04] RING_BUFFER_TAIL */
- REG(0x038), /* [0x06] RING_BUFFER_START */
- REG(0x048), /* [0x08] RING_BUFFER_START_UDW */
- REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */
-
- NOP(5), /* [0x0c] */
- LRI(9, POSTED), /* [0x11] */
- REG(0x168), /* [0x12] BB_ADDR_UDW */
- REG(0x140), /* [0x14] BB_ADDR */
- REG(0x110), /* [0x16] BB_STATE */
- REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */
-
- NOP(12), /* [0x00] */
-
- 0
-};
-
-#undef REG16
-#undef REG
-#undef LRI
-#undef NOP
-
-static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
-{
- if (class == XE_ENGINE_CLASS_RENDER) {
- if (GRAPHICS_VER(xe) >= 20)
- return xe2_rcs_offsets;
- else if (GRAPHICS_VERx100(xe) >= 1270)
- return mtl_rcs_offsets;
- else if (GRAPHICS_VERx100(xe) >= 1255)
- return dg2_rcs_offsets;
- else if (GRAPHICS_VERx100(xe) >= 1250)
- return xehp_rcs_offsets;
- else
- return gen12_rcs_offsets;
- } else if (class == XE_ENGINE_CLASS_COPY) {
- if (GRAPHICS_VER(xe) >= 20)
- return xe2_bcs_offsets;
- else
- return gen12_xcs_offsets;
- } else {
- if (GRAPHICS_VER(xe) >= 20)
- return xe2_xcs_offsets;
- else if (GRAPHICS_VERx100(xe) >= 1255)
- return dg2_xcs_offsets;
- else
- return gen12_xcs_offsets;
- }
-}
-
-static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
-{
- regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
- CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
-
- if (xe_gt_has_indirect_ring_state(hwe->gt))
- regs[CTX_CONTEXT_CONTROL] |=
- _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
-
- /* TODO: Timestamp */
-}
-
-static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
-{
- struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->sriov.vf.memirq;
- struct xe_device *xe = gt_to_xe(hwe->gt);
-
- if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
- return;
-
- regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
- MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
- regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
- regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
-
- regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
- MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
- regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
- regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
- regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
- regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
-}
-
-static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
-{
- struct xe_device *xe = gt_to_xe(hwe->gt);
-
- if (GRAPHICS_VERx100(xe) >= 1250)
- return 0x70;
- else
- return 0x60;
-}
-
-static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
-{
- int x;
-
- x = lrc_ring_mi_mode(hwe);
- regs[x + 1] &= ~STOP_RING;
- regs[x + 1] |= STOP_RING << 16;
-}
-
-static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
-{
- return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
-}
-
-static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
-{
- return 0;
-}
-
-u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
-{
- return lrc->ring.size;
-}
-
-/* Make the magic macros work */
-#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
-#define __xe_lrc_regs_offset xe_lrc_regs_offset
-
-#define LRC_SEQNO_PPHWSP_OFFSET 512
-#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
-#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
-#define LRC_PARALLEL_PPHWSP_OFFSET 2048
-#define LRC_PPHWSP_SIZE SZ_4K
-
-u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
-{
- return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
-}
-
-static size_t lrc_reg_size(struct xe_device *xe)
-{
- if (GRAPHICS_VERx100(xe) >= 1250)
- return 96 * sizeof(u32);
- else
- return 80 * sizeof(u32);
-}
-
-size_t xe_lrc_skip_size(struct xe_device *xe)
-{
- return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
-}
-
-static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
-{
- /* The seqno is stored in the driver-defined portion of PPHWSP */
- return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
-}
-
-static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
-{
- /* The start seqno is stored in the driver-defined portion of PPHWSP */
- return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
-}
-
-static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
-{
- /* The start seqno is stored in the driver-defined portion of PPHWSP */
- return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
-}
-
-static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
-{
- /* The parallel is stored in the driver-defined portion of PPHWSP */
- return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
-}
-
-static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
-{
- return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
-}
-
-static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
-{
- /* Indirect ring state page is at the very end of LRC */
- return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
-}
-
-#define DECL_MAP_ADDR_HELPERS(elem) \
-static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
-{ \
- struct iosys_map map = lrc->bo->vmap; \
-\
- xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \
- iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
- return map; \
-} \
-static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
-{ \
- return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
-} \
-
-DECL_MAP_ADDR_HELPERS(ring)
-DECL_MAP_ADDR_HELPERS(pphwsp)
-DECL_MAP_ADDR_HELPERS(seqno)
-DECL_MAP_ADDR_HELPERS(regs)
-DECL_MAP_ADDR_HELPERS(start_seqno)
-DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
-DECL_MAP_ADDR_HELPERS(ctx_timestamp)
-DECL_MAP_ADDR_HELPERS(parallel)
-DECL_MAP_ADDR_HELPERS(indirect_ring)
-
-#undef DECL_MAP_ADDR_HELPERS
-
-/**
- * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address
- * @lrc: Pointer to the lrc.
- *
- * Returns: ctx timestamp GGTT address
- */
-u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
-}
-
-/**
- * xe_lrc_ctx_timestamp() - Read ctx timestamp value
- * @lrc: Pointer to the lrc.
- *
- * Returns: ctx timestamp value
- */
-u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_ctx_timestamp_map(lrc);
- return xe_map_read32(xe, &map);
-}
-
-/**
- * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
- * @lrc: Pointer to the lrc.
- *
- * Returns: ctx timestamp job GGTT address
- */
-u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
-}
-
-/**
- * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value
- * @lrc: Pointer to the lrc.
- *
- * Returns: ctx timestamp job value
- */
-u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_ctx_job_timestamp_map(lrc);
- return xe_map_read32(xe, &map);
-}
-
-u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_pphwsp_ggtt_addr(lrc);
-}
-
-u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
-{
- if (!xe_lrc_has_indirect_ring_state(lrc))
- return 0;
-
- return __xe_lrc_indirect_ring_ggtt_addr(lrc);
-}
-
-static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_indirect_ring_map(lrc);
- iosys_map_incr(&map, reg_nr * sizeof(u32));
- return xe_map_read32(xe, &map);
-}
-
-static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
- int reg_nr, u32 val)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_indirect_ring_map(lrc);
- iosys_map_incr(&map, reg_nr * sizeof(u32));
- xe_map_write32(xe, &map, val);
-}
-
-u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_regs_map(lrc);
- iosys_map_incr(&map, reg_nr * sizeof(u32));
- return xe_map_read32(xe, &map);
-}
-
-void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_regs_map(lrc);
- iosys_map_incr(&map, reg_nr * sizeof(u32));
- xe_map_write32(xe, &map, val);
-}
-
-static void *empty_lrc_data(struct xe_hw_engine *hwe)
-{
- struct xe_gt *gt = hwe->gt;
- void *data;
- u32 *regs;
-
- data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
- if (!data)
- return NULL;
-
- /* 1st page: Per-Process of HW status Page */
- regs = data + LRC_PPHWSP_SIZE;
- set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
- set_context_control(regs, hwe);
- set_memory_based_intr(regs, hwe);
- reset_stop_ring(regs, hwe);
- if (xe_gt_has_indirect_ring_state(gt)) {
- regs = data + xe_gt_lrc_size(gt, hwe->class) -
- LRC_INDIRECT_RING_STATE_SIZE;
- set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
- }
-
- return data;
-}
-
-static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
-{
- u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
-
- xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
- xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
-}
-
-static void xe_lrc_finish(struct xe_lrc *lrc)
-{
- xe_hw_fence_ctx_finish(&lrc->fence_ctx);
- xe_bo_lock(lrc->bo, false);
- xe_bo_unpin(lrc->bo);
- xe_bo_unlock(lrc->bo);
- xe_bo_put(lrc->bo);
-}
-
-#define PVC_CTX_ASID (0x2e + 1)
-#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
-
-static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
- struct xe_vm *vm, u32 ring_size)
-{
- struct xe_gt *gt = hwe->gt;
- struct xe_tile *tile = gt_to_tile(gt);
- struct xe_device *xe = gt_to_xe(gt);
- struct iosys_map map;
- void *init_data = NULL;
- u32 arb_enable;
- u32 lrc_size;
- int err;
-
- kref_init(&lrc->refcount);
- lrc->flags = 0;
- lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
- if (xe_gt_has_indirect_ring_state(gt))
- lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
-
- /*
- * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
- * via VM bind calls.
- */
- lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
- if (IS_ERR(lrc->bo))
- return PTR_ERR(lrc->bo);
-
- lrc->size = lrc_size;
- lrc->tile = gt_to_tile(hwe->gt);
- lrc->ring.size = ring_size;
- lrc->ring.tail = 0;
- lrc->ctx_timestamp = 0;
-
- xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
- hwe->fence_irq, hwe->name);
-
- if (!gt->default_lrc[hwe->class]) {
- init_data = empty_lrc_data(hwe);
- if (!init_data) {
- err = -ENOMEM;
- goto err_lrc_finish;
- }
- }
-
- /*
- * Init Per-Process of HW status Page, LRC / context state to known
- * values
- */
- map = __xe_lrc_pphwsp_map(lrc);
- if (!init_data) {
- xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */
- xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
- gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
- xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
- } else {
- xe_map_memcpy_to(xe, &map, 0, init_data,
- xe_gt_lrc_size(gt, hwe->class));
- kfree(init_data);
- }
-
- if (vm) {
- xe_lrc_set_ppgtt(lrc, vm);
-
- if (vm->xef)
- xe_drm_client_add_bo(vm->xef->client, lrc->bo);
- }
-
- if (xe_gt_has_indirect_ring_state(gt)) {
- xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
- __xe_lrc_indirect_ring_ggtt_addr(lrc));
-
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
- __xe_lrc_ring_ggtt_addr(lrc));
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
- RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
- } else {
- xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
- xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
- xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
- xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
- RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
- }
-
- xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
-
- if (xe->info.has_asid && vm)
- xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
-
- lrc->desc = LRC_VALID;
- lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
- /* TODO: Priority */
-
- /* While this appears to have something about privileged batches or
- * some such, it really just means PPGTT mode.
- */
- if (vm)
- lrc->desc |= LRC_PRIVILEGE;
-
- if (GRAPHICS_VERx100(xe) < 1250) {
- lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
- lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
- }
-
- arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
- xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
-
- map = __xe_lrc_seqno_map(lrc);
- xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
-
- map = __xe_lrc_start_seqno_map(lrc);
- xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
-
- return 0;
-
-err_lrc_finish:
- xe_lrc_finish(lrc);
- return err;
-}
-
-/**
- * xe_lrc_create - Create a LRC
- * @hwe: Hardware Engine
- * @vm: The VM (address space)
- * @ring_size: LRC ring size
- *
- * Allocate and initialize the Logical Ring Context (LRC).
- *
- * Return pointer to created LRC upon success and an error pointer
- * upon failure.
- */
-struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
- u32 ring_size)
-{
- struct xe_lrc *lrc;
- int err;
-
- lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
- if (!lrc)
- return ERR_PTR(-ENOMEM);
-
- err = xe_lrc_init(lrc, hwe, vm, ring_size);
- if (err) {
- kfree(lrc);
- return ERR_PTR(err);
- }
-
- return lrc;
-}
-
-/**
- * xe_lrc_destroy - Destroy the LRC
- * @ref: reference to LRC
- *
- * Called when ref == 0, release resources held by the Logical Ring Context
- * (LRC) and free the LRC memory.
- */
-void xe_lrc_destroy(struct kref *ref)
-{
- struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
-
- xe_lrc_finish(lrc);
- kfree(lrc);
-}
-
-void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
-{
- if (xe_lrc_has_indirect_ring_state(lrc))
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
- else
- xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
-}
-
-u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
-{
- if (xe_lrc_has_indirect_ring_state(lrc))
- return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
- else
- return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
-}
-
-void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
-{
- if (xe_lrc_has_indirect_ring_state(lrc))
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
- else
- xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
-}
-
-u32 xe_lrc_ring_head(struct xe_lrc *lrc)
-{
- if (xe_lrc_has_indirect_ring_state(lrc))
- return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
- else
- return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
-}
-
-u32 xe_lrc_ring_space(struct xe_lrc *lrc)
-{
- const u32 head = xe_lrc_ring_head(lrc);
- const u32 tail = lrc->ring.tail;
- const u32 size = lrc->ring.size;
-
- return ((head - tail - 1) & (size - 1)) + 1;
-}
-
-static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
- const void *data, size_t size)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
-
- iosys_map_incr(&ring, lrc->ring.tail);
- xe_map_memcpy_to(xe, &ring, 0, data, size);
- lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
-}
-
-void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map ring;
- u32 rhs;
- size_t aligned_size;
-
- xe_assert(xe, IS_ALIGNED(size, 4));
- aligned_size = ALIGN(size, 8);
-
- ring = __xe_lrc_ring_map(lrc);
-
- xe_assert(xe, lrc->ring.tail < lrc->ring.size);
- rhs = lrc->ring.size - lrc->ring.tail;
- if (size > rhs) {
- __xe_lrc_write_ring(lrc, ring, data, rhs);
- __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
- } else {
- __xe_lrc_write_ring(lrc, ring, data, size);
- }
-
- if (aligned_size > size) {
- u32 noop = MI_NOOP;
-
- __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
- }
-}
-
-u64 xe_lrc_descriptor(struct xe_lrc *lrc)
-{
- return lrc->desc | xe_lrc_ggtt_addr(lrc);
-}
-
-u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_seqno_ggtt_addr(lrc);
-}
-
-/**
- * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
- *
- * Allocate but don't initialize an lrc seqno fence.
- *
- * Return: Pointer to the allocated fence or
- * negative error pointer on error.
- */
-struct dma_fence *xe_lrc_alloc_seqno_fence(void)
-{
- return xe_hw_fence_alloc();
-}
-
-/**
- * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
- * @fence: Pointer to the fence to free.
- *
- * Frees an lrc seqno fence that hasn't yet been
- * initialized.
- */
-void xe_lrc_free_seqno_fence(struct dma_fence *fence)
-{
- xe_hw_fence_free(fence);
-}
-
-/**
- * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
- * @lrc: Pointer to the lrc.
- * @fence: Pointer to the fence to initialize.
- *
- * Initializes a pre-allocated lrc seqno fence.
- * After initialization, the fence is subject to normal
- * dma-fence refcounting.
- */
-void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
-{
- xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
-}
-
-s32 xe_lrc_seqno(struct xe_lrc *lrc)
-{
- struct iosys_map map = __xe_lrc_seqno_map(lrc);
-
- return xe_map_read32(lrc_to_xe(lrc), &map);
-}
-
-s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
-{
- struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
-
- return xe_map_read32(lrc_to_xe(lrc), &map);
-}
-
-u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_start_seqno_ggtt_addr(lrc);
-}
-
-u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_parallel_ggtt_addr(lrc);
-}
-
-struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
-{
- return __xe_lrc_parallel_map(lrc);
-}
-
-static int instr_dw(u32 cmd_header)
-{
- /* GFXPIPE "SINGLE_DW" opcodes are a single dword */
- if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
- GFXPIPE_SINGLE_DW_CMD(0, 0))
- return 1;
-
- /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
- if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
- return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
-
- /* Most instructions have the # of dwords (minus 2) in 7:0 */
- return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
-}
-
-static int dump_mi_command(struct drm_printer *p,
- struct xe_gt *gt,
- u32 *dw,
- int remaining_dw)
-{
- u32 inst_header = *dw;
- u32 numdw = instr_dw(inst_header);
- u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
- int num_noop;
-
- /* First check for commands that don't have/use a '# DW' field */
- switch (inst_header & MI_OPCODE) {
- case MI_NOOP:
- num_noop = 1;
- while (num_noop < remaining_dw &&
- (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
- num_noop++;
- drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
- return num_noop;
-
- case MI_TOPOLOGY_FILTER:
- drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
- return 1;
-
- case MI_BATCH_BUFFER_END:
- drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
- /* Return 'remaining_dw' to consume the rest of the LRC */
- return remaining_dw;
- }
-
- /*
- * Any remaining commands include a # of dwords. We should make sure
- * it doesn't exceed the remaining size of the LRC.
- */
- if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
- numdw = remaining_dw;
-
- switch (inst_header & MI_OPCODE) {
- case MI_LOAD_REGISTER_IMM:
- drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
- inst_header, (numdw - 1) / 2);
- for (int i = 1; i < numdw; i += 2)
- drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
- return numdw;
-
- case MI_LOAD_REGISTER_MEM & MI_OPCODE:
- drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
- inst_header,
- dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
- dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
- if (numdw == 4)
- drm_printf(p, " - %#6x = %#010llx\n",
- dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
- else
- drm_printf(p, " - %*ph (%s)\n",
- (int)sizeof(u32) * (numdw - 1), dw + 1,
- numdw < 4 ? "truncated" : "malformed");
- return numdw;
-
- case MI_FORCE_WAKEUP:
- drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
- return numdw;
-
- default:
- drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
- inst_header, opcode, numdw);
- return numdw;
- }
-}
-
-static int dump_gfxpipe_command(struct drm_printer *p,
- struct xe_gt *gt,
- u32 *dw,
- int remaining_dw)
-{
- u32 numdw = instr_dw(*dw);
- u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
- u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
- u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
-
- /*
- * Make sure we haven't mis-parsed a number of dwords that exceeds the
- * remaining size of the LRC.
- */
- if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
- numdw = remaining_dw;
-
- switch (*dw & GFXPIPE_MATCH_MASK) {
-#define MATCH(cmd) \
- case cmd: \
- drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
- return numdw
-#define MATCH3D(cmd) \
- case CMD_##cmd: \
- drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
- return numdw
-
- MATCH(STATE_BASE_ADDRESS);
- MATCH(STATE_SIP);
- MATCH(GPGPU_CSR_BASE_ADDRESS);
- MATCH(STATE_COMPUTE_MODE);
- MATCH3D(3DSTATE_BTD);
- MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
- MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
-
- MATCH3D(3DSTATE_VF_STATISTICS);
-
- MATCH(PIPELINE_SELECT);
-
- MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
- MATCH3D(3DSTATE_CLEAR_PARAMS);
- MATCH3D(3DSTATE_DEPTH_BUFFER);
- MATCH3D(3DSTATE_STENCIL_BUFFER);
- MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
- MATCH3D(3DSTATE_VERTEX_BUFFERS);
- MATCH3D(3DSTATE_VERTEX_ELEMENTS);
- MATCH3D(3DSTATE_INDEX_BUFFER);
- MATCH3D(3DSTATE_VF);
- MATCH3D(3DSTATE_MULTISAMPLE);
- MATCH3D(3DSTATE_CC_STATE_POINTERS);
- MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
- MATCH3D(3DSTATE_VS);
- MATCH3D(3DSTATE_GS);
- MATCH3D(3DSTATE_CLIP);
- MATCH3D(3DSTATE_SF);
- MATCH3D(3DSTATE_WM);
- MATCH3D(3DSTATE_CONSTANT_VS);
- MATCH3D(3DSTATE_CONSTANT_GS);
- MATCH3D(3DSTATE_CONSTANT_PS);
- MATCH3D(3DSTATE_SAMPLE_MASK);
- MATCH3D(3DSTATE_CONSTANT_HS);
- MATCH3D(3DSTATE_CONSTANT_DS);
- MATCH3D(3DSTATE_HS);
- MATCH3D(3DSTATE_TE);
- MATCH3D(3DSTATE_DS);
- MATCH3D(3DSTATE_STREAMOUT);
- MATCH3D(3DSTATE_SBE);
- MATCH3D(3DSTATE_PS);
- MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
- MATCH3D(3DSTATE_CPS_POINTERS);
- MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
- MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
- MATCH3D(3DSTATE_VF_INSTANCING);
- MATCH3D(3DSTATE_VF_SGVS);
- MATCH3D(3DSTATE_VF_TOPOLOGY);
- MATCH3D(3DSTATE_WM_CHROMAKEY);
- MATCH3D(3DSTATE_PS_BLEND);
- MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
- MATCH3D(3DSTATE_PS_EXTRA);
- MATCH3D(3DSTATE_RASTER);
- MATCH3D(3DSTATE_SBE_SWIZ);
- MATCH3D(3DSTATE_WM_HZ_OP);
- MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
- MATCH3D(3DSTATE_VF_SGVS_2);
- MATCH3D(3DSTATE_VFG);
- MATCH3D(3DSTATE_URB_ALLOC_VS);
- MATCH3D(3DSTATE_URB_ALLOC_HS);
- MATCH3D(3DSTATE_URB_ALLOC_DS);
- MATCH3D(3DSTATE_URB_ALLOC_GS);
- MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
- MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
- MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
- MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
- MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
- MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
- MATCH3D(3DSTATE_AMFS);
- MATCH3D(3DSTATE_DEPTH_BOUNDS);
- MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
- MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
- MATCH3D(3DSTATE_MESH_CONTROL);
- MATCH3D(3DSTATE_MESH_DISTRIB);
- MATCH3D(3DSTATE_TASK_REDISTRIB);
- MATCH3D(3DSTATE_MESH_SHADER);
- MATCH3D(3DSTATE_MESH_SHADER_DATA);
- MATCH3D(3DSTATE_TASK_CONTROL);
- MATCH3D(3DSTATE_TASK_SHADER);
- MATCH3D(3DSTATE_TASK_SHADER_DATA);
- MATCH3D(3DSTATE_URB_ALLOC_MESH);
- MATCH3D(3DSTATE_URB_ALLOC_TASK);
- MATCH3D(3DSTATE_CLIP_MESH);
- MATCH3D(3DSTATE_SBE_MESH);
- MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
-
- MATCH3D(3DSTATE_DRAWING_RECTANGLE);
- MATCH3D(3DSTATE_CHROMA_KEY);
- MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
- MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
- MATCH3D(3DSTATE_LINE_STIPPLE);
- MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
- MATCH3D(3DSTATE_MONOFILTER_SIZE);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
- MATCH3D(3DSTATE_SO_DECL_LIST);
- MATCH3D(3DSTATE_SO_BUFFER);
- MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
- MATCH3D(3DSTATE_SAMPLE_PATTERN);
- MATCH3D(3DSTATE_3D_MODE);
- MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
- MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
- MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
-
- default:
- drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
- *dw, pipeline, opcode, subopcode, numdw);
- return numdw;
- }
-}
-
-static int dump_gfx_state_command(struct drm_printer *p,
- struct xe_gt *gt,
- u32 *dw,
- int remaining_dw)
-{
- u32 numdw = instr_dw(*dw);
- u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
-
- /*
- * Make sure we haven't mis-parsed a number of dwords that exceeds the
- * remaining size of the LRC.
- */
- if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
- numdw = remaining_dw;
-
- switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
- MATCH(STATE_WRITE_INLINE);
-
- default:
- drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
- *dw, opcode, numdw);
- return numdw;
- }
-}
-
-void xe_lrc_dump_default(struct drm_printer *p,
- struct xe_gt *gt,
- enum xe_engine_class hwe_class)
-{
- u32 *dw;
- int remaining_dw, num_dw;
-
- if (!gt->default_lrc[hwe_class]) {
- drm_printf(p, "No default LRC for class %d\n", hwe_class);
- return;
- }
-
- /*
- * Skip the beginning of the LRC since it contains the per-process
- * hardware status page.
- */
- dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
- remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
-
- while (remaining_dw > 0) {
- if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
- num_dw = dump_mi_command(p, gt, dw, remaining_dw);
- } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
- num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
- } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
- num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
- } else {
- num_dw = min(instr_dw(*dw), remaining_dw);
- drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
- *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
- num_dw);
- }
-
- dw += num_dw;
- remaining_dw -= num_dw;
- }
-}
-
-struct instr_state {
- u32 instr;
- u16 num_dw;
-};
-
-static const struct instr_state xe_hpg_svg_state[] = {
- { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
- { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
- { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
- { .instr = CMD_3DSTATE_VS, .num_dw = 9 },
- { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
- { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
- { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
- { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
- { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
- { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
- { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
- { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
- { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SF, .num_dw = 4 },
- { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
- { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
- { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
- { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
- { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
- { .instr = CMD_3DSTATE_HS, .num_dw = 9 },
- { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
- { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
- { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
- { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
- { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
- { .instr = CMD_3DSTATE_TE, .num_dw = 5 },
- { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
- { .instr = CMD_3DSTATE_DS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
- { .instr = CMD_3DSTATE_GS, .num_dw = 10 },
- { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
- { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
- { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
- { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
- { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
- { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
-};
-
-void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
-{
- struct xe_gt *gt = q->hwe->gt;
- struct xe_device *xe = gt_to_xe(gt);
- const struct instr_state *state_table = NULL;
- int state_table_size = 0;
-
- /*
- * Wa_14019789679
- *
- * If the driver doesn't explicitly emit the SVG instructions while
- * setting up the default LRC, the context switch will write 0's
- * (noops) into the LRC memory rather than the expected instruction
- * headers. Application contexts start out as a copy of the default
- * LRC, and if they also do not emit specific settings for some SVG
- * state, then on context restore they'll unintentionally inherit
- * whatever state setting the previous context had programmed into the
- * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will
- * prevent the hardware from resetting that state back to any specific
- * value).
- *
- * The official workaround only requires emitting 3DSTATE_MESH_CONTROL
- * since that's a specific state setting that can easily cause GPU
- * hangs if unintentionally inherited. However to be safe we'll
- * continue to emit all of the SVG state since it's best not to leak
- * any of the state between contexts, even if that leakage is harmless.
- */
- if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
- state_table = xe_hpg_svg_state;
- state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
- }
-
- if (!state_table) {
- xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
- GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
- return;
- }
-
- for (int i = 0; i < state_table_size; i++) {
- u32 instr = state_table[i].instr;
- u16 num_dw = state_table[i].num_dw;
- bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
-
- xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
- xe_gt_assert(gt, num_dw != 0);
- xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
-
- /*
- * Xe2's SVG context is the same as the one on DG2 / MTL
- * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
- * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
- * Just make the replacement here rather than defining a
- * whole separate table for the single trivial change.
- */
- if (GRAPHICS_VER(xe) >= 20 &&
- instr == CMD_3DSTATE_DRAWING_RECTANGLE)
- instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
-
- bb->cs[bb->len] = instr;
- if (!is_single_dw)
- bb->cs[bb->len] |= (num_dw - 2);
-
- bb->len += num_dw;
- }
-}
-
-struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
-{
- struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
-
- if (!snapshot)
- return NULL;
-
-<<<<<<<
- if (lrc->bo && lrc->bo->vm)
-=======
- if (lrc->bo->vm)
->>>>>>>
- xe_vm_get(lrc->bo->vm);
-
- snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
- snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
- snapshot->head = xe_lrc_ring_head(lrc);
- snapshot->tail.internal = lrc->ring.tail;
- snapshot->tail.memory = xe_lrc_ring_tail(lrc);
- snapshot->start_seqno = xe_lrc_start_seqno(lrc);
- snapshot->seqno = xe_lrc_seqno(lrc);
- snapshot->lrc_bo = xe_bo_get(lrc->bo);
- snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
- snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
- snapshot->lrc_snapshot = NULL;
- snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
- snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
- return snapshot;
-}
-
-void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
-{
- struct xe_bo *bo;
- struct xe_vm *vm;
- struct iosys_map src;
-
- if (!snapshot)
- return;
-
- bo = snapshot->lrc_bo;
- vm = bo->vm;
- snapshot->lrc_bo = NULL;
-
- snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
- if (!snapshot->lrc_snapshot)
- goto put_bo;
-
- xe_bo_lock(bo, false);
- if (!ttm_bo_vmap(&bo->ttm, &src)) {
- xe_map_memcpy_from(xe_bo_device(bo),
- snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
- snapshot->lrc_size);
- ttm_bo_vunmap(&bo->ttm, &src);
- } else {
- kvfree(snapshot->lrc_snapshot);
- snapshot->lrc_snapshot = NULL;
- }
- xe_bo_unlock(bo);
-put_bo:
- xe_bo_put(bo);
- if (vm)
- xe_vm_put(vm);
-}
-
-void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
-{
- unsigned long i;
-
- if (!snapshot)
- return;
-
- drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
- drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
- snapshot->indirect_context_desc);
- drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
- drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
- snapshot->tail.internal, snapshot->tail.memory);
- drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
- drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
- drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
- drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
-
- if (!snapshot->lrc_snapshot)
- return;
-
- drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
- drm_puts(p, "\t[HWSP].data: ");
- for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
- u32 *val = snapshot->lrc_snapshot + i;
- char dumped[ASCII85_BUFSZ];
-
- drm_puts(p, ascii85_encode(*val, dumped));
- }
-
- drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
- drm_puts(p, "\t[HWCTX].data: ");
- for (; i < snapshot->lrc_size; i += sizeof(u32)) {
- u32 *val = snapshot->lrc_snapshot + i;
- char dumped[ASCII85_BUFSZ];
-
- drm_puts(p, ascii85_encode(*val, dumped));
- }
- drm_puts(p, "\n");
-}
-
-void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
-{
- if (!snapshot)
- return;
-
- kvfree(snapshot->lrc_snapshot);
- if (snapshot->lrc_bo) {
- struct xe_vm *vm;
-
- vm = snapshot->lrc_bo->vm;
- xe_bo_put(snapshot->lrc_bo);
- if (vm)
- xe_vm_put(vm);
- }
- kfree(snapshot);
-}
-
-/**
- * xe_lrc_update_timestamp() - Update ctx timestamp
- * @lrc: Pointer to the lrc.
- * @old_ts: Old timestamp value
- *
- * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
- * update saved value.
- *
- * Returns: New ctx timestamp value
- */
-u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
-{
- *old_ts = lrc->ctx_timestamp;
-
- lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
-
- return lrc->ctx_timestamp;
-}
diff --git a/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage.1 b/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage.1
deleted file mode 100644
index bee934c9371f..000000000000
--- a/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage.1
+++ /dev/null
@@ -1,1784 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2021 Intel Corporation
- */
-
-#include "xe_lrc.h"
-
-#include <generated/xe_wa_oob.h>
-
-#include <linux/ascii85.h>
-
-#include "instructions/xe_mi_commands.h"
-#include "instructions/xe_gfxpipe_commands.h"
-#include "instructions/xe_gfx_state_commands.h"
-#include "regs/xe_engine_regs.h"
-#include "regs/xe_lrc_layout.h"
-#include "xe_bb.h"
-#include "xe_bo.h"
-#include "xe_device.h"
-#include "xe_drm_client.h"
-#include "xe_exec_queue_types.h"
-#include "xe_gt.h"
-#include "xe_gt_printk.h"
-#include "xe_hw_fence.h"
-#include "xe_map.h"
-#include "xe_memirq.h"
-#include "xe_sriov.h"
-#include "xe_vm.h"
-#include "xe_wa.h"
-
-#define LRC_VALID BIT_ULL(0)
-#define LRC_PRIVILEGE BIT_ULL(8)
-#define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3)
-#define LRC_LEGACY_64B_CONTEXT 3
-
-#define LRC_ENGINE_CLASS GENMASK_ULL(63, 61)
-#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48)
-
-#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
-
-struct xe_lrc_snapshot {
- struct xe_bo *lrc_bo;
- void *lrc_snapshot;
- unsigned long lrc_size, lrc_offset;
-
- u32 context_desc;
- u32 indirect_context_desc;
- u32 head;
- struct {
- u32 internal;
- u32 memory;
- } tail;
- u32 start_seqno;
- u32 seqno;
- u32 ctx_timestamp;
- u32 ctx_job_timestamp;
-};
-
-static struct xe_device *
-lrc_to_xe(struct xe_lrc *lrc)
-{
- return gt_to_xe(lrc->fence_ctx.gt);
-}
-
-size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
-{
- struct xe_device *xe = gt_to_xe(gt);
- size_t size;
-
- switch (class) {
- case XE_ENGINE_CLASS_RENDER:
- if (GRAPHICS_VER(xe) >= 20)
- size = 4 * SZ_4K;
- else
- size = 14 * SZ_4K;
- break;
- case XE_ENGINE_CLASS_COMPUTE:
- /* 14 pages since graphics_ver == 11 */
- if (GRAPHICS_VER(xe) >= 20)
- size = 3 * SZ_4K;
- else
- size = 14 * SZ_4K;
- break;
- default:
- WARN(1, "Unknown engine class: %d", class);
- fallthrough;
- case XE_ENGINE_CLASS_COPY:
- case XE_ENGINE_CLASS_VIDEO_DECODE:
- case XE_ENGINE_CLASS_VIDEO_ENHANCE:
- case XE_ENGINE_CLASS_OTHER:
- size = 2 * SZ_4K;
- }
-
- /* Add indirect ring state page */
- if (xe_gt_has_indirect_ring_state(gt))
- size += LRC_INDIRECT_RING_STATE_SIZE;
-
- return size;
-}
-
-/*
- * The per-platform tables are u8-encoded in @data. Decode @data and set the
- * addresses' offset and commands in @regs. The following encoding is used
- * for each byte. There are 2 steps: decoding commands and decoding addresses.
- *
- * Commands:
- * [7]: create NOPs - number of NOPs are set in lower bits
- * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
- * MI_LRI_FORCE_POSTED
- * [5:0]: Number of NOPs or registers to set values to in case of
- * MI_LOAD_REGISTER_IMM
- *
- * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
- * number of registers. They are set by using the REG/REG16 macros: the former
- * is used for offsets smaller than 0x200 while the latter is for values bigger
- * than that. Those macros already set all the bits documented below correctly:
- *
- * [7]: When a register offset needs more than 6 bits, use additional bytes, to
- * follow, for the lower bits
- * [6:0]: Register offset, without considering the engine base.
- *
- * This function only tweaks the commands and register offsets. Values are not
- * filled out.
- */
-static void set_offsets(u32 *regs,
- const u8 *data,
- const struct xe_hw_engine *hwe)
-#define NOP(x) (BIT(7) | (x))
-#define LRI(count, flags) ((flags) << 6 | (count) | \
- BUILD_BUG_ON_ZERO(count >= BIT(6)))
-#define POSTED BIT(0)
-#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
-#define REG16(x) \
- (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
- (((x) >> 2) & 0x7f)
-{
- const u32 base = hwe->mmio_base;
-
- while (*data) {
- u8 count, flags;
-
- if (*data & BIT(7)) { /* skip */
- count = *data++ & ~BIT(7);
- regs += count;
- continue;
- }
-
- count = *data & 0x3f;
- flags = *data >> 6;
- data++;
-
- *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
- if (flags & POSTED)
- *regs |= MI_LRI_FORCE_POSTED;
- *regs |= MI_LRI_LRM_CS_MMIO;
- regs++;
-
- xe_gt_assert(hwe->gt, count);
- do {
- u32 offset = 0;
- u8 v;
-
- do {
- v = *data++;
- offset <<= 7;
- offset |= v & ~BIT(7);
- } while (v & BIT(7));
-
- regs[0] = base + (offset << 2);
- regs += 2;
- } while (--count);
- }
-
- *regs = MI_BATCH_BUFFER_END | BIT(0);
-}
-
-static const u8 gen12_xcs_offsets[] = {
- NOP(1),
- LRI(13, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
-
- NOP(5),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- 0
-};
-
-static const u8 dg2_xcs_offsets[] = {
- NOP(1),
- LRI(15, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
- REG(0x120),
- REG(0x124),
-
- NOP(1),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- 0
-};
-
-static const u8 gen12_rcs_offsets[] = {
- NOP(1),
- LRI(13, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
-
- NOP(5),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- LRI(3, POSTED),
- REG(0x1b0),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
- NOP(3 + 9 + 1),
-
- LRI(51, POSTED),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG(0x028),
- REG(0x09c),
- REG(0x0c0),
- REG(0x178),
- REG(0x17c),
- REG16(0x358),
- REG(0x170),
- REG(0x150),
- REG(0x154),
- REG(0x158),
- REG16(0x41c),
- REG16(0x600),
- REG16(0x604),
- REG16(0x608),
- REG16(0x60c),
- REG16(0x610),
- REG16(0x614),
- REG16(0x618),
- REG16(0x61c),
- REG16(0x620),
- REG16(0x624),
- REG16(0x628),
- REG16(0x62c),
- REG16(0x630),
- REG16(0x634),
- REG16(0x638),
- REG16(0x63c),
- REG16(0x640),
- REG16(0x644),
- REG16(0x648),
- REG16(0x64c),
- REG16(0x650),
- REG16(0x654),
- REG16(0x658),
- REG16(0x65c),
- REG16(0x660),
- REG16(0x664),
- REG16(0x668),
- REG16(0x66c),
- REG16(0x670),
- REG16(0x674),
- REG16(0x678),
- REG16(0x67c),
- REG(0x068),
- REG(0x084),
- NOP(1),
-
- 0
-};
-
-static const u8 xehp_rcs_offsets[] = {
- NOP(1),
- LRI(13, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
-
- NOP(5),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- LRI(3, POSTED),
- REG(0x1b0),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
-
- 0
-};
-
-static const u8 dg2_rcs_offsets[] = {
- NOP(1),
- LRI(15, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
- REG(0x120),
- REG(0x124),
-
- NOP(1),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- LRI(3, POSTED),
- REG(0x1b0),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
-
- 0
-};
-
-static const u8 mtl_rcs_offsets[] = {
- NOP(1),
- LRI(15, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
- REG(0x120),
- REG(0x124),
-
- NOP(1),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- NOP(2),
- LRI(2, POSTED),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
-
- 0
-};
-
-#define XE2_CTX_COMMON \
- NOP(1), /* [0x00] */ \
- LRI(15, POSTED), /* [0x01] */ \
- REG16(0x244), /* [0x02] CTXT_SR_CTL */ \
- REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \
- REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \
- REG(0x038), /* [0x08] RING_BUFFER_START */ \
- REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \
- REG(0x168), /* [0x0c] BB_ADDR_UDW */ \
- REG(0x140), /* [0x0e] BB_ADDR */ \
- REG(0x110), /* [0x10] BB_STATE */ \
- REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \
- REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \
- REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
- REG(0x180), /* [0x18] CCID */ \
- REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \
- REG(0x120), /* [0x1c] PRT_BB_STATE */ \
- REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \
- \
- NOP(1), /* [0x20] */ \
- LRI(9, POSTED), /* [0x21] */ \
- REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \
- REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \
- REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \
- REG16(0x284), /* [0x28] dummy reg */ \
- REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \
- REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \
- REG16(0x278), /* [0x2e] CS_CTX_ASID */ \
- REG16(0x274), /* [0x30] PTBP_UDW */ \
- REG16(0x270) /* [0x32] PTBP_LDW */
-
-static const u8 xe2_rcs_offsets[] = {
- XE2_CTX_COMMON,
-
- NOP(2), /* [0x34] */
- LRI(2, POSTED), /* [0x36] */
- REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
- REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */
-
- NOP(6), /* [0x41] */
- LRI(1, 0), /* [0x47] */
- REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */
-
- 0
-};
-
-static const u8 xe2_bcs_offsets[] = {
- XE2_CTX_COMMON,
-
- NOP(4 + 8 + 1), /* [0x34] */
- LRI(2, POSTED), /* [0x41] */
- REG16(0x200), /* [0x42] BCS_SWCTRL */
- REG16(0x204), /* [0x44] BLIT_CCTL */
-
- 0
-};
-
-static const u8 xe2_xcs_offsets[] = {
- XE2_CTX_COMMON,
-
- 0
-};
-
-static const u8 xe2_indirect_ring_state_offsets[] = {
- NOP(1), /* [0x00] */
- LRI(5, POSTED), /* [0x01] */
- REG(0x034), /* [0x02] RING_BUFFER_HEAD */
- REG(0x030), /* [0x04] RING_BUFFER_TAIL */
- REG(0x038), /* [0x06] RING_BUFFER_START */
- REG(0x048), /* [0x08] RING_BUFFER_START_UDW */
- REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */
-
- NOP(5), /* [0x0c] */
- LRI(9, POSTED), /* [0x11] */
- REG(0x168), /* [0x12] BB_ADDR_UDW */
- REG(0x140), /* [0x14] BB_ADDR */
- REG(0x110), /* [0x16] BB_STATE */
- REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */
-
- NOP(12), /* [0x00] */
-
- 0
-};
-
-#undef REG16
-#undef REG
-#undef LRI
-#undef NOP
-
-static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
-{
- if (class == XE_ENGINE_CLASS_RENDER) {
- if (GRAPHICS_VER(xe) >= 20)
- return xe2_rcs_offsets;
- else if (GRAPHICS_VERx100(xe) >= 1270)
- return mtl_rcs_offsets;
- else if (GRAPHICS_VERx100(xe) >= 1255)
- return dg2_rcs_offsets;
- else if (GRAPHICS_VERx100(xe) >= 1250)
- return xehp_rcs_offsets;
- else
- return gen12_rcs_offsets;
- } else if (class == XE_ENGINE_CLASS_COPY) {
- if (GRAPHICS_VER(xe) >= 20)
- return xe2_bcs_offsets;
- else
- return gen12_xcs_offsets;
- } else {
- if (GRAPHICS_VER(xe) >= 20)
- return xe2_xcs_offsets;
- else if (GRAPHICS_VERx100(xe) >= 1255)
- return dg2_xcs_offsets;
- else
- return gen12_xcs_offsets;
- }
-}
-
-static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
-{
- regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
- CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
-
- if (xe_gt_has_indirect_ring_state(hwe->gt))
- regs[CTX_CONTEXT_CONTROL] |=
- _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
-
- /* TODO: Timestamp */
-}
-
-static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
-{
- struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->sriov.vf.memirq;
- struct xe_device *xe = gt_to_xe(hwe->gt);
-
- if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
- return;
-
- regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
- MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
- regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
- regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
-
- regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
- MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
- regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
- regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
- regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
- regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
-}
-
-static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
-{
- struct xe_device *xe = gt_to_xe(hwe->gt);
-
- if (GRAPHICS_VERx100(xe) >= 1250)
- return 0x70;
- else
- return 0x60;
-}
-
-static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
-{
- int x;
-
- x = lrc_ring_mi_mode(hwe);
- regs[x + 1] &= ~STOP_RING;
- regs[x + 1] |= STOP_RING << 16;
-}
-
-static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
-{
- return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
-}
-
-static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
-{
- return 0;
-}
-
-u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
-{
- return lrc->ring.size;
-}
-
-/* Make the magic macros work */
-#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
-#define __xe_lrc_regs_offset xe_lrc_regs_offset
-
-#define LRC_SEQNO_PPHWSP_OFFSET 512
-#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
-#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
-#define LRC_PARALLEL_PPHWSP_OFFSET 2048
-#define LRC_PPHWSP_SIZE SZ_4K
-
-u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
-{
- return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
-}
-
-static size_t lrc_reg_size(struct xe_device *xe)
-{
- if (GRAPHICS_VERx100(xe) >= 1250)
- return 96 * sizeof(u32);
- else
- return 80 * sizeof(u32);
-}
-
-size_t xe_lrc_skip_size(struct xe_device *xe)
-{
- return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
-}
-
-static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
-{
- /* The seqno is stored in the driver-defined portion of PPHWSP */
- return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
-}
-
-static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
-{
- /* The start seqno is stored in the driver-defined portion of PPHWSP */
- return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
-}
-
-static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
-{
- /* The start seqno is stored in the driver-defined portion of PPHWSP */
- return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
-}
-
-static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
-{
- /* The parallel is stored in the driver-defined portion of PPHWSP */
- return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
-}
-
-static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
-{
- return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
-}
-
-static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
-{
- /* Indirect ring state page is at the very end of LRC */
- return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
-}
-
-#define DECL_MAP_ADDR_HELPERS(elem) \
-static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
-{ \
- struct iosys_map map = lrc->bo->vmap; \
-\
- xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \
- iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
- return map; \
-} \
-static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
-{ \
- return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
-} \
-
-DECL_MAP_ADDR_HELPERS(ring)
-DECL_MAP_ADDR_HELPERS(pphwsp)
-DECL_MAP_ADDR_HELPERS(seqno)
-DECL_MAP_ADDR_HELPERS(regs)
-DECL_MAP_ADDR_HELPERS(start_seqno)
-DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
-DECL_MAP_ADDR_HELPERS(ctx_timestamp)
-DECL_MAP_ADDR_HELPERS(parallel)
-DECL_MAP_ADDR_HELPERS(indirect_ring)
-
-#undef DECL_MAP_ADDR_HELPERS
-
-/**
- * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address
- * @lrc: Pointer to the lrc.
- *
- * Returns: ctx timestamp GGTT address
- */
-u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
-}
-
-/**
- * xe_lrc_ctx_timestamp() - Read ctx timestamp value
- * @lrc: Pointer to the lrc.
- *
- * Returns: ctx timestamp value
- */
-u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_ctx_timestamp_map(lrc);
- return xe_map_read32(xe, &map);
-}
-
-/**
- * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
- * @lrc: Pointer to the lrc.
- *
- * Returns: ctx timestamp job GGTT address
- */
-u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
-}
-
-/**
- * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value
- * @lrc: Pointer to the lrc.
- *
- * Returns: ctx timestamp job value
- */
-u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_ctx_job_timestamp_map(lrc);
- return xe_map_read32(xe, &map);
-}
-
-u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_pphwsp_ggtt_addr(lrc);
-}
-
-u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
-{
- if (!xe_lrc_has_indirect_ring_state(lrc))
- return 0;
-
- return __xe_lrc_indirect_ring_ggtt_addr(lrc);
-}
-
-static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_indirect_ring_map(lrc);
- iosys_map_incr(&map, reg_nr * sizeof(u32));
- return xe_map_read32(xe, &map);
-}
-
-static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
- int reg_nr, u32 val)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_indirect_ring_map(lrc);
- iosys_map_incr(&map, reg_nr * sizeof(u32));
- xe_map_write32(xe, &map, val);
-}
-
-u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_regs_map(lrc);
- iosys_map_incr(&map, reg_nr * sizeof(u32));
- return xe_map_read32(xe, &map);
-}
-
-void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_regs_map(lrc);
- iosys_map_incr(&map, reg_nr * sizeof(u32));
- xe_map_write32(xe, &map, val);
-}
-
-static void *empty_lrc_data(struct xe_hw_engine *hwe)
-{
- struct xe_gt *gt = hwe->gt;
- void *data;
- u32 *regs;
-
- data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
- if (!data)
- return NULL;
-
- /* 1st page: Per-Process of HW status Page */
- regs = data + LRC_PPHWSP_SIZE;
- set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
- set_context_control(regs, hwe);
- set_memory_based_intr(regs, hwe);
- reset_stop_ring(regs, hwe);
- if (xe_gt_has_indirect_ring_state(gt)) {
- regs = data + xe_gt_lrc_size(gt, hwe->class) -
- LRC_INDIRECT_RING_STATE_SIZE;
- set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
- }
-
- return data;
-}
-
-static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
-{
- u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
-
- xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
- xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
-}
-
-static void xe_lrc_finish(struct xe_lrc *lrc)
-{
- xe_hw_fence_ctx_finish(&lrc->fence_ctx);
- xe_bo_lock(lrc->bo, false);
- xe_bo_unpin(lrc->bo);
- xe_bo_unlock(lrc->bo);
- xe_bo_put(lrc->bo);
-}
-
-#define PVC_CTX_ASID (0x2e + 1)
-#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
-
-static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
- struct xe_vm *vm, u32 ring_size)
-{
- struct xe_gt *gt = hwe->gt;
- struct xe_tile *tile = gt_to_tile(gt);
- struct xe_device *xe = gt_to_xe(gt);
- struct iosys_map map;
- void *init_data = NULL;
- u32 arb_enable;
- u32 lrc_size;
- int err;
-
- kref_init(&lrc->refcount);
- lrc->flags = 0;
- lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
- if (xe_gt_has_indirect_ring_state(gt))
- lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
-
- /*
- * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
- * via VM bind calls.
- */
- lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
- if (IS_ERR(lrc->bo))
- return PTR_ERR(lrc->bo);
-
- lrc->size = lrc_size;
- lrc->tile = gt_to_tile(hwe->gt);
- lrc->ring.size = ring_size;
- lrc->ring.tail = 0;
- lrc->ctx_timestamp = 0;
-
- xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
- hwe->fence_irq, hwe->name);
-
- if (!gt->default_lrc[hwe->class]) {
- init_data = empty_lrc_data(hwe);
- if (!init_data) {
- err = -ENOMEM;
- goto err_lrc_finish;
- }
- }
-
- /*
- * Init Per-Process of HW status Page, LRC / context state to known
- * values
- */
- map = __xe_lrc_pphwsp_map(lrc);
- if (!init_data) {
- xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */
- xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
- gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
- xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
- } else {
- xe_map_memcpy_to(xe, &map, 0, init_data,
- xe_gt_lrc_size(gt, hwe->class));
- kfree(init_data);
- }
-
- if (vm) {
- xe_lrc_set_ppgtt(lrc, vm);
-
- if (vm->xef)
- xe_drm_client_add_bo(vm->xef->client, lrc->bo);
- }
-
- if (xe_gt_has_indirect_ring_state(gt)) {
- xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
- __xe_lrc_indirect_ring_ggtt_addr(lrc));
-
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
- __xe_lrc_ring_ggtt_addr(lrc));
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
- RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
- } else {
- xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
- xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
- xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
- xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
- RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
- }
-
- xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
-
- if (xe->info.has_asid && vm)
- xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
-
- lrc->desc = LRC_VALID;
- lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
- /* TODO: Priority */
-
- /* While this appears to have something about privileged batches or
- * some such, it really just means PPGTT mode.
- */
- if (vm)
- lrc->desc |= LRC_PRIVILEGE;
-
- if (GRAPHICS_VERx100(xe) < 1250) {
- lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
- lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
- }
-
- arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
- xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
-
- map = __xe_lrc_seqno_map(lrc);
- xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
-
- map = __xe_lrc_start_seqno_map(lrc);
- xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
-
- return 0;
-
-err_lrc_finish:
- xe_lrc_finish(lrc);
- return err;
-}
-
-/**
- * xe_lrc_create - Create a LRC
- * @hwe: Hardware Engine
- * @vm: The VM (address space)
- * @ring_size: LRC ring size
- *
- * Allocate and initialize the Logical Ring Context (LRC).
- *
- * Return pointer to created LRC upon success and an error pointer
- * upon failure.
- */
-struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
- u32 ring_size)
-{
- struct xe_lrc *lrc;
- int err;
-
- lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
- if (!lrc)
- return ERR_PTR(-ENOMEM);
-
- err = xe_lrc_init(lrc, hwe, vm, ring_size);
- if (err) {
- kfree(lrc);
- return ERR_PTR(err);
- }
-
- return lrc;
-}
-
-/**
- * xe_lrc_destroy - Destroy the LRC
- * @ref: reference to LRC
- *
- * Called when ref == 0, release resources held by the Logical Ring Context
- * (LRC) and free the LRC memory.
- */
-void xe_lrc_destroy(struct kref *ref)
-{
- struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
-
- xe_lrc_finish(lrc);
- kfree(lrc);
-}
-
-void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
-{
- if (xe_lrc_has_indirect_ring_state(lrc))
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
- else
- xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
-}
-
-u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
-{
- if (xe_lrc_has_indirect_ring_state(lrc))
- return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
- else
- return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
-}
-
-void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
-{
- if (xe_lrc_has_indirect_ring_state(lrc))
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
- else
- xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
-}
-
-u32 xe_lrc_ring_head(struct xe_lrc *lrc)
-{
- if (xe_lrc_has_indirect_ring_state(lrc))
- return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
- else
- return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
-}
-
-u32 xe_lrc_ring_space(struct xe_lrc *lrc)
-{
- const u32 head = xe_lrc_ring_head(lrc);
- const u32 tail = lrc->ring.tail;
- const u32 size = lrc->ring.size;
-
- return ((head - tail - 1) & (size - 1)) + 1;
-}
-
-static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
- const void *data, size_t size)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
-
- iosys_map_incr(&ring, lrc->ring.tail);
- xe_map_memcpy_to(xe, &ring, 0, data, size);
- lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
-}
-
-void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map ring;
- u32 rhs;
- size_t aligned_size;
-
- xe_assert(xe, IS_ALIGNED(size, 4));
- aligned_size = ALIGN(size, 8);
-
- ring = __xe_lrc_ring_map(lrc);
-
- xe_assert(xe, lrc->ring.tail < lrc->ring.size);
- rhs = lrc->ring.size - lrc->ring.tail;
- if (size > rhs) {
- __xe_lrc_write_ring(lrc, ring, data, rhs);
- __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
- } else {
- __xe_lrc_write_ring(lrc, ring, data, size);
- }
-
- if (aligned_size > size) {
- u32 noop = MI_NOOP;
-
- __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
- }
-}
-
-u64 xe_lrc_descriptor(struct xe_lrc *lrc)
-{
- return lrc->desc | xe_lrc_ggtt_addr(lrc);
-}
-
-u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_seqno_ggtt_addr(lrc);
-}
-
-/**
- * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
- *
- * Allocate but don't initialize an lrc seqno fence.
- *
- * Return: Pointer to the allocated fence or
- * negative error pointer on error.
- */
-struct dma_fence *xe_lrc_alloc_seqno_fence(void)
-{
- return xe_hw_fence_alloc();
-}
-
-/**
- * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
- * @fence: Pointer to the fence to free.
- *
- * Frees an lrc seqno fence that hasn't yet been
- * initialized.
- */
-void xe_lrc_free_seqno_fence(struct dma_fence *fence)
-{
- xe_hw_fence_free(fence);
-}
-
-/**
- * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
- * @lrc: Pointer to the lrc.
- * @fence: Pointer to the fence to initialize.
- *
- * Initializes a pre-allocated lrc seqno fence.
- * After initialization, the fence is subject to normal
- * dma-fence refcounting.
- */
-void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
-{
- xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
-}
-
-s32 xe_lrc_seqno(struct xe_lrc *lrc)
-{
- struct iosys_map map = __xe_lrc_seqno_map(lrc);
-
- return xe_map_read32(lrc_to_xe(lrc), &map);
-}
-
-s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
-{
- struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
-
- return xe_map_read32(lrc_to_xe(lrc), &map);
-}
-
-u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_start_seqno_ggtt_addr(lrc);
-}
-
-u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_parallel_ggtt_addr(lrc);
-}
-
-struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
-{
- return __xe_lrc_parallel_map(lrc);
-}
-
-static int instr_dw(u32 cmd_header)
-{
- /* GFXPIPE "SINGLE_DW" opcodes are a single dword */
- if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
- GFXPIPE_SINGLE_DW_CMD(0, 0))
- return 1;
-
- /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
- if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
- return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
-
- /* Most instructions have the # of dwords (minus 2) in 7:0 */
- return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
-}
-
-static int dump_mi_command(struct drm_printer *p,
- struct xe_gt *gt,
- u32 *dw,
- int remaining_dw)
-{
- u32 inst_header = *dw;
- u32 numdw = instr_dw(inst_header);
- u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
- int num_noop;
-
- /* First check for commands that don't have/use a '# DW' field */
- switch (inst_header & MI_OPCODE) {
- case MI_NOOP:
- num_noop = 1;
- while (num_noop < remaining_dw &&
- (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
- num_noop++;
- drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
- return num_noop;
-
- case MI_TOPOLOGY_FILTER:
- drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
- return 1;
-
- case MI_BATCH_BUFFER_END:
- drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
- /* Return 'remaining_dw' to consume the rest of the LRC */
- return remaining_dw;
- }
-
- /*
- * Any remaining commands include a # of dwords. We should make sure
- * it doesn't exceed the remaining size of the LRC.
- */
- if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
- numdw = remaining_dw;
-
- switch (inst_header & MI_OPCODE) {
- case MI_LOAD_REGISTER_IMM:
- drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
- inst_header, (numdw - 1) / 2);
- for (int i = 1; i < numdw; i += 2)
- drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
- return numdw;
-
- case MI_LOAD_REGISTER_MEM & MI_OPCODE:
- drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
- inst_header,
- dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
- dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
- if (numdw == 4)
- drm_printf(p, " - %#6x = %#010llx\n",
- dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
- else
- drm_printf(p, " - %*ph (%s)\n",
- (int)sizeof(u32) * (numdw - 1), dw + 1,
- numdw < 4 ? "truncated" : "malformed");
- return numdw;
-
- case MI_FORCE_WAKEUP:
- drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
- return numdw;
-
- default:
- drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
- inst_header, opcode, numdw);
- return numdw;
- }
-}
-
-static int dump_gfxpipe_command(struct drm_printer *p,
- struct xe_gt *gt,
- u32 *dw,
- int remaining_dw)
-{
- u32 numdw = instr_dw(*dw);
- u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
- u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
- u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
-
- /*
- * Make sure we haven't mis-parsed a number of dwords that exceeds the
- * remaining size of the LRC.
- */
- if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
- numdw = remaining_dw;
-
- switch (*dw & GFXPIPE_MATCH_MASK) {
-#define MATCH(cmd) \
- case cmd: \
- drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
- return numdw
-#define MATCH3D(cmd) \
- case CMD_##cmd: \
- drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
- return numdw
-
- MATCH(STATE_BASE_ADDRESS);
- MATCH(STATE_SIP);
- MATCH(GPGPU_CSR_BASE_ADDRESS);
- MATCH(STATE_COMPUTE_MODE);
- MATCH3D(3DSTATE_BTD);
- MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
- MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
-
- MATCH3D(3DSTATE_VF_STATISTICS);
-
- MATCH(PIPELINE_SELECT);
-
- MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
- MATCH3D(3DSTATE_CLEAR_PARAMS);
- MATCH3D(3DSTATE_DEPTH_BUFFER);
- MATCH3D(3DSTATE_STENCIL_BUFFER);
- MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
- MATCH3D(3DSTATE_VERTEX_BUFFERS);
- MATCH3D(3DSTATE_VERTEX_ELEMENTS);
- MATCH3D(3DSTATE_INDEX_BUFFER);
- MATCH3D(3DSTATE_VF);
- MATCH3D(3DSTATE_MULTISAMPLE);
- MATCH3D(3DSTATE_CC_STATE_POINTERS);
- MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
- MATCH3D(3DSTATE_VS);
- MATCH3D(3DSTATE_GS);
- MATCH3D(3DSTATE_CLIP);
- MATCH3D(3DSTATE_SF);
- MATCH3D(3DSTATE_WM);
- MATCH3D(3DSTATE_CONSTANT_VS);
- MATCH3D(3DSTATE_CONSTANT_GS);
- MATCH3D(3DSTATE_CONSTANT_PS);
- MATCH3D(3DSTATE_SAMPLE_MASK);
- MATCH3D(3DSTATE_CONSTANT_HS);
- MATCH3D(3DSTATE_CONSTANT_DS);
- MATCH3D(3DSTATE_HS);
- MATCH3D(3DSTATE_TE);
- MATCH3D(3DSTATE_DS);
- MATCH3D(3DSTATE_STREAMOUT);
- MATCH3D(3DSTATE_SBE);
- MATCH3D(3DSTATE_PS);
- MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
- MATCH3D(3DSTATE_CPS_POINTERS);
- MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
- MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
- MATCH3D(3DSTATE_VF_INSTANCING);
- MATCH3D(3DSTATE_VF_SGVS);
- MATCH3D(3DSTATE_VF_TOPOLOGY);
- MATCH3D(3DSTATE_WM_CHROMAKEY);
- MATCH3D(3DSTATE_PS_BLEND);
- MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
- MATCH3D(3DSTATE_PS_EXTRA);
- MATCH3D(3DSTATE_RASTER);
- MATCH3D(3DSTATE_SBE_SWIZ);
- MATCH3D(3DSTATE_WM_HZ_OP);
- MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
- MATCH3D(3DSTATE_VF_SGVS_2);
- MATCH3D(3DSTATE_VFG);
- MATCH3D(3DSTATE_URB_ALLOC_VS);
- MATCH3D(3DSTATE_URB_ALLOC_HS);
- MATCH3D(3DSTATE_URB_ALLOC_DS);
- MATCH3D(3DSTATE_URB_ALLOC_GS);
- MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
- MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
- MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
- MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
- MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
- MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
- MATCH3D(3DSTATE_AMFS);
- MATCH3D(3DSTATE_DEPTH_BOUNDS);
- MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
- MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
- MATCH3D(3DSTATE_MESH_CONTROL);
- MATCH3D(3DSTATE_MESH_DISTRIB);
- MATCH3D(3DSTATE_TASK_REDISTRIB);
- MATCH3D(3DSTATE_MESH_SHADER);
- MATCH3D(3DSTATE_MESH_SHADER_DATA);
- MATCH3D(3DSTATE_TASK_CONTROL);
- MATCH3D(3DSTATE_TASK_SHADER);
- MATCH3D(3DSTATE_TASK_SHADER_DATA);
- MATCH3D(3DSTATE_URB_ALLOC_MESH);
- MATCH3D(3DSTATE_URB_ALLOC_TASK);
- MATCH3D(3DSTATE_CLIP_MESH);
- MATCH3D(3DSTATE_SBE_MESH);
- MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
-
- MATCH3D(3DSTATE_DRAWING_RECTANGLE);
- MATCH3D(3DSTATE_CHROMA_KEY);
- MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
- MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
- MATCH3D(3DSTATE_LINE_STIPPLE);
- MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
- MATCH3D(3DSTATE_MONOFILTER_SIZE);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
- MATCH3D(3DSTATE_SO_DECL_LIST);
- MATCH3D(3DSTATE_SO_BUFFER);
- MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
- MATCH3D(3DSTATE_SAMPLE_PATTERN);
- MATCH3D(3DSTATE_3D_MODE);
- MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
- MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
- MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
-
- default:
- drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
- *dw, pipeline, opcode, subopcode, numdw);
- return numdw;
- }
-}
-
-static int dump_gfx_state_command(struct drm_printer *p,
- struct xe_gt *gt,
- u32 *dw,
- int remaining_dw)
-{
- u32 numdw = instr_dw(*dw);
- u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
-
- /*
- * Make sure we haven't mis-parsed a number of dwords that exceeds the
- * remaining size of the LRC.
- */
- if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
- numdw = remaining_dw;
-
- switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
- MATCH(STATE_WRITE_INLINE);
-
- default:
- drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
- *dw, opcode, numdw);
- return numdw;
- }
-}
-
-void xe_lrc_dump_default(struct drm_printer *p,
- struct xe_gt *gt,
- enum xe_engine_class hwe_class)
-{
- u32 *dw;
- int remaining_dw, num_dw;
-
- if (!gt->default_lrc[hwe_class]) {
- drm_printf(p, "No default LRC for class %d\n", hwe_class);
- return;
- }
-
- /*
- * Skip the beginning of the LRC since it contains the per-process
- * hardware status page.
- */
- dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
- remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
-
- while (remaining_dw > 0) {
- if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
- num_dw = dump_mi_command(p, gt, dw, remaining_dw);
- } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
- num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
- } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
- num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
- } else {
- num_dw = min(instr_dw(*dw), remaining_dw);
- drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
- *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
- num_dw);
- }
-
- dw += num_dw;
- remaining_dw -= num_dw;
- }
-}
-
-struct instr_state {
- u32 instr;
- u16 num_dw;
-};
-
-static const struct instr_state xe_hpg_svg_state[] = {
- { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
- { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
- { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
- { .instr = CMD_3DSTATE_VS, .num_dw = 9 },
- { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
- { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
- { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
- { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
- { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
- { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
- { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
- { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
- { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SF, .num_dw = 4 },
- { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
- { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
- { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
- { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
- { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
- { .instr = CMD_3DSTATE_HS, .num_dw = 9 },
- { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
- { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
- { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
- { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
- { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
- { .instr = CMD_3DSTATE_TE, .num_dw = 5 },
- { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
- { .instr = CMD_3DSTATE_DS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
- { .instr = CMD_3DSTATE_GS, .num_dw = 10 },
- { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
- { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
- { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
- { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
- { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
- { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
-};
-
-void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
-{
- struct xe_gt *gt = q->hwe->gt;
- struct xe_device *xe = gt_to_xe(gt);
- const struct instr_state *state_table = NULL;
- int state_table_size = 0;
-
- /*
- * Wa_14019789679
- *
- * If the driver doesn't explicitly emit the SVG instructions while
- * setting up the default LRC, the context switch will write 0's
- * (noops) into the LRC memory rather than the expected instruction
- * headers. Application contexts start out as a copy of the default
- * LRC, and if they also do not emit specific settings for some SVG
- * state, then on context restore they'll unintentionally inherit
- * whatever state setting the previous context had programmed into the
- * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will
- * prevent the hardware from resetting that state back to any specific
- * value).
- *
- * The official workaround only requires emitting 3DSTATE_MESH_CONTROL
- * since that's a specific state setting that can easily cause GPU
- * hangs if unintentionally inherited. However to be safe we'll
- * continue to emit all of the SVG state since it's best not to leak
- * any of the state between contexts, even if that leakage is harmless.
- */
- if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
- state_table = xe_hpg_svg_state;
- state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
- }
-
- if (!state_table) {
- xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
- GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
- return;
- }
-
- for (int i = 0; i < state_table_size; i++) {
- u32 instr = state_table[i].instr;
- u16 num_dw = state_table[i].num_dw;
- bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
-
- xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
- xe_gt_assert(gt, num_dw != 0);
- xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
-
- /*
- * Xe2's SVG context is the same as the one on DG2 / MTL
- * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
- * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
- * Just make the replacement here rather than defining a
- * whole separate table for the single trivial change.
- */
- if (GRAPHICS_VER(xe) >= 20 &&
- instr == CMD_3DSTATE_DRAWING_RECTANGLE)
- instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
-
- bb->cs[bb->len] = instr;
- if (!is_single_dw)
- bb->cs[bb->len] |= (num_dw - 2);
-
- bb->len += num_dw;
- }
-}
-
-struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
-{
- struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
-
- if (!snapshot)
- return NULL;
-
-<<<<<<<
- if (lrc->bo && lrc->bo->vm)
-=======
- if (lrc->bo->vm)
->>>>>>>
- xe_vm_get(lrc->bo->vm);
-
- snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
- snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
- snapshot->head = xe_lrc_ring_head(lrc);
- snapshot->tail.internal = lrc->ring.tail;
- snapshot->tail.memory = xe_lrc_ring_tail(lrc);
- snapshot->start_seqno = xe_lrc_start_seqno(lrc);
- snapshot->seqno = xe_lrc_seqno(lrc);
- snapshot->lrc_bo = xe_bo_get(lrc->bo);
- snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
- snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
- snapshot->lrc_snapshot = NULL;
- snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
- snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
- return snapshot;
-}
-
-void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
-{
- struct xe_bo *bo;
- struct xe_vm *vm;
- struct iosys_map src;
-
- if (!snapshot)
- return;
-
- bo = snapshot->lrc_bo;
- vm = bo->vm;
- snapshot->lrc_bo = NULL;
-
- snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
- if (!snapshot->lrc_snapshot)
- goto put_bo;
-
- xe_bo_lock(bo, false);
- if (!ttm_bo_vmap(&bo->ttm, &src)) {
- xe_map_memcpy_from(xe_bo_device(bo),
- snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
- snapshot->lrc_size);
- ttm_bo_vunmap(&bo->ttm, &src);
- } else {
- kvfree(snapshot->lrc_snapshot);
- snapshot->lrc_snapshot = NULL;
- }
- xe_bo_unlock(bo);
-put_bo:
- xe_bo_put(bo);
- if (vm)
- xe_vm_put(vm);
-}
-
-void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
-{
- unsigned long i;
-
- if (!snapshot)
- return;
-
- drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
- drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
- snapshot->indirect_context_desc);
- drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
- drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
- snapshot->tail.internal, snapshot->tail.memory);
- drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
- drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
- drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
- drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
-
- if (!snapshot->lrc_snapshot)
- return;
-
- drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
- drm_puts(p, "\t[HWSP].data: ");
- for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
- u32 *val = snapshot->lrc_snapshot + i;
- char dumped[ASCII85_BUFSZ];
-
- drm_puts(p, ascii85_encode(*val, dumped));
- }
-
- drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
- drm_puts(p, "\t[HWCTX].data: ");
- for (; i < snapshot->lrc_size; i += sizeof(u32)) {
- u32 *val = snapshot->lrc_snapshot + i;
- char dumped[ASCII85_BUFSZ];
-
- drm_puts(p, ascii85_encode(*val, dumped));
- }
- drm_puts(p, "\n");
-}
-
-void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
-{
- if (!snapshot)
- return;
-
- kvfree(snapshot->lrc_snapshot);
- if (snapshot->lrc_bo) {
- struct xe_vm *vm;
-
- vm = snapshot->lrc_bo->vm;
- xe_bo_put(snapshot->lrc_bo);
- if (vm)
- xe_vm_put(vm);
- }
- kfree(snapshot);
-}
-
-/**
- * xe_lrc_update_timestamp() - Update ctx timestamp
- * @lrc: Pointer to the lrc.
- * @old_ts: Old timestamp value
- *
- * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
- * update saved value.
- *
- * Returns: New ctx timestamp value
- */
-u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
-{
- *old_ts = lrc->ctx_timestamp;
-
- lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
-
- return lrc->ctx_timestamp;
-}
diff --git a/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage.2 b/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage.2
deleted file mode 100644
index bee934c9371f..000000000000
--- a/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage.2
+++ /dev/null
@@ -1,1784 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2021 Intel Corporation
- */
-
-#include "xe_lrc.h"
-
-#include <generated/xe_wa_oob.h>
-
-#include <linux/ascii85.h>
-
-#include "instructions/xe_mi_commands.h"
-#include "instructions/xe_gfxpipe_commands.h"
-#include "instructions/xe_gfx_state_commands.h"
-#include "regs/xe_engine_regs.h"
-#include "regs/xe_lrc_layout.h"
-#include "xe_bb.h"
-#include "xe_bo.h"
-#include "xe_device.h"
-#include "xe_drm_client.h"
-#include "xe_exec_queue_types.h"
-#include "xe_gt.h"
-#include "xe_gt_printk.h"
-#include "xe_hw_fence.h"
-#include "xe_map.h"
-#include "xe_memirq.h"
-#include "xe_sriov.h"
-#include "xe_vm.h"
-#include "xe_wa.h"
-
-#define LRC_VALID BIT_ULL(0)
-#define LRC_PRIVILEGE BIT_ULL(8)
-#define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3)
-#define LRC_LEGACY_64B_CONTEXT 3
-
-#define LRC_ENGINE_CLASS GENMASK_ULL(63, 61)
-#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48)
-
-#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
-
-struct xe_lrc_snapshot {
- struct xe_bo *lrc_bo;
- void *lrc_snapshot;
- unsigned long lrc_size, lrc_offset;
-
- u32 context_desc;
- u32 indirect_context_desc;
- u32 head;
- struct {
- u32 internal;
- u32 memory;
- } tail;
- u32 start_seqno;
- u32 seqno;
- u32 ctx_timestamp;
- u32 ctx_job_timestamp;
-};
-
-static struct xe_device *
-lrc_to_xe(struct xe_lrc *lrc)
-{
- return gt_to_xe(lrc->fence_ctx.gt);
-}
-
-size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
-{
- struct xe_device *xe = gt_to_xe(gt);
- size_t size;
-
- switch (class) {
- case XE_ENGINE_CLASS_RENDER:
- if (GRAPHICS_VER(xe) >= 20)
- size = 4 * SZ_4K;
- else
- size = 14 * SZ_4K;
- break;
- case XE_ENGINE_CLASS_COMPUTE:
- /* 14 pages since graphics_ver == 11 */
- if (GRAPHICS_VER(xe) >= 20)
- size = 3 * SZ_4K;
- else
- size = 14 * SZ_4K;
- break;
- default:
- WARN(1, "Unknown engine class: %d", class);
- fallthrough;
- case XE_ENGINE_CLASS_COPY:
- case XE_ENGINE_CLASS_VIDEO_DECODE:
- case XE_ENGINE_CLASS_VIDEO_ENHANCE:
- case XE_ENGINE_CLASS_OTHER:
- size = 2 * SZ_4K;
- }
-
- /* Add indirect ring state page */
- if (xe_gt_has_indirect_ring_state(gt))
- size += LRC_INDIRECT_RING_STATE_SIZE;
-
- return size;
-}
-
-/*
- * The per-platform tables are u8-encoded in @data. Decode @data and set the
- * addresses' offset and commands in @regs. The following encoding is used
- * for each byte. There are 2 steps: decoding commands and decoding addresses.
- *
- * Commands:
- * [7]: create NOPs - number of NOPs are set in lower bits
- * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
- * MI_LRI_FORCE_POSTED
- * [5:0]: Number of NOPs or registers to set values to in case of
- * MI_LOAD_REGISTER_IMM
- *
- * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
- * number of registers. They are set by using the REG/REG16 macros: the former
- * is used for offsets smaller than 0x200 while the latter is for values bigger
- * than that. Those macros already set all the bits documented below correctly:
- *
- * [7]: When a register offset needs more than 6 bits, use additional bytes, to
- * follow, for the lower bits
- * [6:0]: Register offset, without considering the engine base.
- *
- * This function only tweaks the commands and register offsets. Values are not
- * filled out.
- */
-static void set_offsets(u32 *regs,
- const u8 *data,
- const struct xe_hw_engine *hwe)
-#define NOP(x) (BIT(7) | (x))
-#define LRI(count, flags) ((flags) << 6 | (count) | \
- BUILD_BUG_ON_ZERO(count >= BIT(6)))
-#define POSTED BIT(0)
-#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
-#define REG16(x) \
- (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
- (((x) >> 2) & 0x7f)
-{
- const u32 base = hwe->mmio_base;
-
- while (*data) {
- u8 count, flags;
-
- if (*data & BIT(7)) { /* skip */
- count = *data++ & ~BIT(7);
- regs += count;
- continue;
- }
-
- count = *data & 0x3f;
- flags = *data >> 6;
- data++;
-
- *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
- if (flags & POSTED)
- *regs |= MI_LRI_FORCE_POSTED;
- *regs |= MI_LRI_LRM_CS_MMIO;
- regs++;
-
- xe_gt_assert(hwe->gt, count);
- do {
- u32 offset = 0;
- u8 v;
-
- do {
- v = *data++;
- offset <<= 7;
- offset |= v & ~BIT(7);
- } while (v & BIT(7));
-
- regs[0] = base + (offset << 2);
- regs += 2;
- } while (--count);
- }
-
- *regs = MI_BATCH_BUFFER_END | BIT(0);
-}
-
-static const u8 gen12_xcs_offsets[] = {
- NOP(1),
- LRI(13, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
-
- NOP(5),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- 0
-};
-
-static const u8 dg2_xcs_offsets[] = {
- NOP(1),
- LRI(15, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
- REG(0x120),
- REG(0x124),
-
- NOP(1),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- 0
-};
-
-static const u8 gen12_rcs_offsets[] = {
- NOP(1),
- LRI(13, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
-
- NOP(5),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- LRI(3, POSTED),
- REG(0x1b0),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
- NOP(3 + 9 + 1),
-
- LRI(51, POSTED),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG16(0x588),
- REG(0x028),
- REG(0x09c),
- REG(0x0c0),
- REG(0x178),
- REG(0x17c),
- REG16(0x358),
- REG(0x170),
- REG(0x150),
- REG(0x154),
- REG(0x158),
- REG16(0x41c),
- REG16(0x600),
- REG16(0x604),
- REG16(0x608),
- REG16(0x60c),
- REG16(0x610),
- REG16(0x614),
- REG16(0x618),
- REG16(0x61c),
- REG16(0x620),
- REG16(0x624),
- REG16(0x628),
- REG16(0x62c),
- REG16(0x630),
- REG16(0x634),
- REG16(0x638),
- REG16(0x63c),
- REG16(0x640),
- REG16(0x644),
- REG16(0x648),
- REG16(0x64c),
- REG16(0x650),
- REG16(0x654),
- REG16(0x658),
- REG16(0x65c),
- REG16(0x660),
- REG16(0x664),
- REG16(0x668),
- REG16(0x66c),
- REG16(0x670),
- REG16(0x674),
- REG16(0x678),
- REG16(0x67c),
- REG(0x068),
- REG(0x084),
- NOP(1),
-
- 0
-};
-
-static const u8 xehp_rcs_offsets[] = {
- NOP(1),
- LRI(13, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
-
- NOP(5),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- LRI(3, POSTED),
- REG(0x1b0),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
-
- 0
-};
-
-static const u8 dg2_rcs_offsets[] = {
- NOP(1),
- LRI(15, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
- REG(0x120),
- REG(0x124),
-
- NOP(1),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- LRI(3, POSTED),
- REG(0x1b0),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
-
- 0
-};
-
-static const u8 mtl_rcs_offsets[] = {
- NOP(1),
- LRI(15, POSTED),
- REG16(0x244),
- REG(0x034),
- REG(0x030),
- REG(0x038),
- REG(0x03c),
- REG(0x168),
- REG(0x140),
- REG(0x110),
- REG(0x1c0),
- REG(0x1c4),
- REG(0x1c8),
- REG(0x180),
- REG16(0x2b4),
- REG(0x120),
- REG(0x124),
-
- NOP(1),
- LRI(9, POSTED),
- REG16(0x3a8),
- REG16(0x28c),
- REG16(0x288),
- REG16(0x284),
- REG16(0x280),
- REG16(0x27c),
- REG16(0x278),
- REG16(0x274),
- REG16(0x270),
-
- NOP(2),
- LRI(2, POSTED),
- REG16(0x5a8),
- REG16(0x5ac),
-
- NOP(6),
- LRI(1, 0),
- REG(0x0c8),
-
- 0
-};
-
-#define XE2_CTX_COMMON \
- NOP(1), /* [0x00] */ \
- LRI(15, POSTED), /* [0x01] */ \
- REG16(0x244), /* [0x02] CTXT_SR_CTL */ \
- REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \
- REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \
- REG(0x038), /* [0x08] RING_BUFFER_START */ \
- REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \
- REG(0x168), /* [0x0c] BB_ADDR_UDW */ \
- REG(0x140), /* [0x0e] BB_ADDR */ \
- REG(0x110), /* [0x10] BB_STATE */ \
- REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \
- REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \
- REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
- REG(0x180), /* [0x18] CCID */ \
- REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \
- REG(0x120), /* [0x1c] PRT_BB_STATE */ \
- REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \
- \
- NOP(1), /* [0x20] */ \
- LRI(9, POSTED), /* [0x21] */ \
- REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \
- REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \
- REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \
- REG16(0x284), /* [0x28] dummy reg */ \
- REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \
- REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \
- REG16(0x278), /* [0x2e] CS_CTX_ASID */ \
- REG16(0x274), /* [0x30] PTBP_UDW */ \
- REG16(0x270) /* [0x32] PTBP_LDW */
-
-static const u8 xe2_rcs_offsets[] = {
- XE2_CTX_COMMON,
-
- NOP(2), /* [0x34] */
- LRI(2, POSTED), /* [0x36] */
- REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
- REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */
-
- NOP(6), /* [0x41] */
- LRI(1, 0), /* [0x47] */
- REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */
-
- 0
-};
-
-static const u8 xe2_bcs_offsets[] = {
- XE2_CTX_COMMON,
-
- NOP(4 + 8 + 1), /* [0x34] */
- LRI(2, POSTED), /* [0x41] */
- REG16(0x200), /* [0x42] BCS_SWCTRL */
- REG16(0x204), /* [0x44] BLIT_CCTL */
-
- 0
-};
-
-static const u8 xe2_xcs_offsets[] = {
- XE2_CTX_COMMON,
-
- 0
-};
-
-static const u8 xe2_indirect_ring_state_offsets[] = {
- NOP(1), /* [0x00] */
- LRI(5, POSTED), /* [0x01] */
- REG(0x034), /* [0x02] RING_BUFFER_HEAD */
- REG(0x030), /* [0x04] RING_BUFFER_TAIL */
- REG(0x038), /* [0x06] RING_BUFFER_START */
- REG(0x048), /* [0x08] RING_BUFFER_START_UDW */
- REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */
-
- NOP(5), /* [0x0c] */
- LRI(9, POSTED), /* [0x11] */
- REG(0x168), /* [0x12] BB_ADDR_UDW */
- REG(0x140), /* [0x14] BB_ADDR */
- REG(0x110), /* [0x16] BB_STATE */
- REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */
- REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */
-
- NOP(12), /* [0x00] */
-
- 0
-};
-
-#undef REG16
-#undef REG
-#undef LRI
-#undef NOP
-
-static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
-{
- if (class == XE_ENGINE_CLASS_RENDER) {
- if (GRAPHICS_VER(xe) >= 20)
- return xe2_rcs_offsets;
- else if (GRAPHICS_VERx100(xe) >= 1270)
- return mtl_rcs_offsets;
- else if (GRAPHICS_VERx100(xe) >= 1255)
- return dg2_rcs_offsets;
- else if (GRAPHICS_VERx100(xe) >= 1250)
- return xehp_rcs_offsets;
- else
- return gen12_rcs_offsets;
- } else if (class == XE_ENGINE_CLASS_COPY) {
- if (GRAPHICS_VER(xe) >= 20)
- return xe2_bcs_offsets;
- else
- return gen12_xcs_offsets;
- } else {
- if (GRAPHICS_VER(xe) >= 20)
- return xe2_xcs_offsets;
- else if (GRAPHICS_VERx100(xe) >= 1255)
- return dg2_xcs_offsets;
- else
- return gen12_xcs_offsets;
- }
-}
-
-static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
-{
- regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
- CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
-
- if (xe_gt_has_indirect_ring_state(hwe->gt))
- regs[CTX_CONTEXT_CONTROL] |=
- _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
-
- /* TODO: Timestamp */
-}
-
-static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
-{
- struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->sriov.vf.memirq;
- struct xe_device *xe = gt_to_xe(hwe->gt);
-
- if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
- return;
-
- regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
- MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
- regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
- regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
-
- regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
- MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
- regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
- regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
- regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
- regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
-}
-
-static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
-{
- struct xe_device *xe = gt_to_xe(hwe->gt);
-
- if (GRAPHICS_VERx100(xe) >= 1250)
- return 0x70;
- else
- return 0x60;
-}
-
-static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
-{
- int x;
-
- x = lrc_ring_mi_mode(hwe);
- regs[x + 1] &= ~STOP_RING;
- regs[x + 1] |= STOP_RING << 16;
-}
-
-static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
-{
- return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
-}
-
-static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
-{
- return 0;
-}
-
-u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
-{
- return lrc->ring.size;
-}
-
-/* Make the magic macros work */
-#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
-#define __xe_lrc_regs_offset xe_lrc_regs_offset
-
-#define LRC_SEQNO_PPHWSP_OFFSET 512
-#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
-#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
-#define LRC_PARALLEL_PPHWSP_OFFSET 2048
-#define LRC_PPHWSP_SIZE SZ_4K
-
-u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
-{
- return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
-}
-
-static size_t lrc_reg_size(struct xe_device *xe)
-{
- if (GRAPHICS_VERx100(xe) >= 1250)
- return 96 * sizeof(u32);
- else
- return 80 * sizeof(u32);
-}
-
-size_t xe_lrc_skip_size(struct xe_device *xe)
-{
- return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
-}
-
-static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
-{
- /* The seqno is stored in the driver-defined portion of PPHWSP */
- return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
-}
-
-static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
-{
- /* The start seqno is stored in the driver-defined portion of PPHWSP */
- return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
-}
-
-static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
-{
- /* The start seqno is stored in the driver-defined portion of PPHWSP */
- return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
-}
-
-static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
-{
- /* The parallel is stored in the driver-defined portion of PPHWSP */
- return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
-}
-
-static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
-{
- return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
-}
-
-static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
-{
- /* Indirect ring state page is at the very end of LRC */
- return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
-}
-
-#define DECL_MAP_ADDR_HELPERS(elem) \
-static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
-{ \
- struct iosys_map map = lrc->bo->vmap; \
-\
- xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \
- iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
- return map; \
-} \
-static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
-{ \
- return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
-} \
-
-DECL_MAP_ADDR_HELPERS(ring)
-DECL_MAP_ADDR_HELPERS(pphwsp)
-DECL_MAP_ADDR_HELPERS(seqno)
-DECL_MAP_ADDR_HELPERS(regs)
-DECL_MAP_ADDR_HELPERS(start_seqno)
-DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
-DECL_MAP_ADDR_HELPERS(ctx_timestamp)
-DECL_MAP_ADDR_HELPERS(parallel)
-DECL_MAP_ADDR_HELPERS(indirect_ring)
-
-#undef DECL_MAP_ADDR_HELPERS
-
-/**
- * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address
- * @lrc: Pointer to the lrc.
- *
- * Returns: ctx timestamp GGTT address
- */
-u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
-}
-
-/**
- * xe_lrc_ctx_timestamp() - Read ctx timestamp value
- * @lrc: Pointer to the lrc.
- *
- * Returns: ctx timestamp value
- */
-u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_ctx_timestamp_map(lrc);
- return xe_map_read32(xe, &map);
-}
-
-/**
- * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
- * @lrc: Pointer to the lrc.
- *
- * Returns: ctx timestamp job GGTT address
- */
-u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
-}
-
-/**
- * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value
- * @lrc: Pointer to the lrc.
- *
- * Returns: ctx timestamp job value
- */
-u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_ctx_job_timestamp_map(lrc);
- return xe_map_read32(xe, &map);
-}
-
-u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_pphwsp_ggtt_addr(lrc);
-}
-
-u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
-{
- if (!xe_lrc_has_indirect_ring_state(lrc))
- return 0;
-
- return __xe_lrc_indirect_ring_ggtt_addr(lrc);
-}
-
-static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_indirect_ring_map(lrc);
- iosys_map_incr(&map, reg_nr * sizeof(u32));
- return xe_map_read32(xe, &map);
-}
-
-static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
- int reg_nr, u32 val)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_indirect_ring_map(lrc);
- iosys_map_incr(&map, reg_nr * sizeof(u32));
- xe_map_write32(xe, &map, val);
-}
-
-u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_regs_map(lrc);
- iosys_map_incr(&map, reg_nr * sizeof(u32));
- return xe_map_read32(xe, &map);
-}
-
-void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map map;
-
- map = __xe_lrc_regs_map(lrc);
- iosys_map_incr(&map, reg_nr * sizeof(u32));
- xe_map_write32(xe, &map, val);
-}
-
-static void *empty_lrc_data(struct xe_hw_engine *hwe)
-{
- struct xe_gt *gt = hwe->gt;
- void *data;
- u32 *regs;
-
- data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
- if (!data)
- return NULL;
-
- /* 1st page: Per-Process of HW status Page */
- regs = data + LRC_PPHWSP_SIZE;
- set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
- set_context_control(regs, hwe);
- set_memory_based_intr(regs, hwe);
- reset_stop_ring(regs, hwe);
- if (xe_gt_has_indirect_ring_state(gt)) {
- regs = data + xe_gt_lrc_size(gt, hwe->class) -
- LRC_INDIRECT_RING_STATE_SIZE;
- set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
- }
-
- return data;
-}
-
-static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
-{
- u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
-
- xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
- xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
-}
-
-static void xe_lrc_finish(struct xe_lrc *lrc)
-{
- xe_hw_fence_ctx_finish(&lrc->fence_ctx);
- xe_bo_lock(lrc->bo, false);
- xe_bo_unpin(lrc->bo);
- xe_bo_unlock(lrc->bo);
- xe_bo_put(lrc->bo);
-}
-
-#define PVC_CTX_ASID (0x2e + 1)
-#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
-
-static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
- struct xe_vm *vm, u32 ring_size)
-{
- struct xe_gt *gt = hwe->gt;
- struct xe_tile *tile = gt_to_tile(gt);
- struct xe_device *xe = gt_to_xe(gt);
- struct iosys_map map;
- void *init_data = NULL;
- u32 arb_enable;
- u32 lrc_size;
- int err;
-
- kref_init(&lrc->refcount);
- lrc->flags = 0;
- lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
- if (xe_gt_has_indirect_ring_state(gt))
- lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
-
- /*
- * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
- * via VM bind calls.
- */
- lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
- if (IS_ERR(lrc->bo))
- return PTR_ERR(lrc->bo);
-
- lrc->size = lrc_size;
- lrc->tile = gt_to_tile(hwe->gt);
- lrc->ring.size = ring_size;
- lrc->ring.tail = 0;
- lrc->ctx_timestamp = 0;
-
- xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
- hwe->fence_irq, hwe->name);
-
- if (!gt->default_lrc[hwe->class]) {
- init_data = empty_lrc_data(hwe);
- if (!init_data) {
- err = -ENOMEM;
- goto err_lrc_finish;
- }
- }
-
- /*
- * Init Per-Process of HW status Page, LRC / context state to known
- * values
- */
- map = __xe_lrc_pphwsp_map(lrc);
- if (!init_data) {
- xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */
- xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
- gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
- xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
- } else {
- xe_map_memcpy_to(xe, &map, 0, init_data,
- xe_gt_lrc_size(gt, hwe->class));
- kfree(init_data);
- }
-
- if (vm) {
- xe_lrc_set_ppgtt(lrc, vm);
-
- if (vm->xef)
- xe_drm_client_add_bo(vm->xef->client, lrc->bo);
- }
-
- if (xe_gt_has_indirect_ring_state(gt)) {
- xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
- __xe_lrc_indirect_ring_ggtt_addr(lrc));
-
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
- __xe_lrc_ring_ggtt_addr(lrc));
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
- RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
- } else {
- xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
- xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
- xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
- xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
- RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
- }
-
- xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
-
- if (xe->info.has_asid && vm)
- xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
-
- lrc->desc = LRC_VALID;
- lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
- /* TODO: Priority */
-
- /* While this appears to have something about privileged batches or
- * some such, it really just means PPGTT mode.
- */
- if (vm)
- lrc->desc |= LRC_PRIVILEGE;
-
- if (GRAPHICS_VERx100(xe) < 1250) {
- lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
- lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
- }
-
- arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
- xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
-
- map = __xe_lrc_seqno_map(lrc);
- xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
-
- map = __xe_lrc_start_seqno_map(lrc);
- xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
-
- return 0;
-
-err_lrc_finish:
- xe_lrc_finish(lrc);
- return err;
-}
-
-/**
- * xe_lrc_create - Create a LRC
- * @hwe: Hardware Engine
- * @vm: The VM (address space)
- * @ring_size: LRC ring size
- *
- * Allocate and initialize the Logical Ring Context (LRC).
- *
- * Return pointer to created LRC upon success and an error pointer
- * upon failure.
- */
-struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
- u32 ring_size)
-{
- struct xe_lrc *lrc;
- int err;
-
- lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
- if (!lrc)
- return ERR_PTR(-ENOMEM);
-
- err = xe_lrc_init(lrc, hwe, vm, ring_size);
- if (err) {
- kfree(lrc);
- return ERR_PTR(err);
- }
-
- return lrc;
-}
-
-/**
- * xe_lrc_destroy - Destroy the LRC
- * @ref: reference to LRC
- *
- * Called when ref == 0, release resources held by the Logical Ring Context
- * (LRC) and free the LRC memory.
- */
-void xe_lrc_destroy(struct kref *ref)
-{
- struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
-
- xe_lrc_finish(lrc);
- kfree(lrc);
-}
-
-void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
-{
- if (xe_lrc_has_indirect_ring_state(lrc))
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
- else
- xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
-}
-
-u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
-{
- if (xe_lrc_has_indirect_ring_state(lrc))
- return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
- else
- return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
-}
-
-void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
-{
- if (xe_lrc_has_indirect_ring_state(lrc))
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
- else
- xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
-}
-
-u32 xe_lrc_ring_head(struct xe_lrc *lrc)
-{
- if (xe_lrc_has_indirect_ring_state(lrc))
- return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
- else
- return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
-}
-
-u32 xe_lrc_ring_space(struct xe_lrc *lrc)
-{
- const u32 head = xe_lrc_ring_head(lrc);
- const u32 tail = lrc->ring.tail;
- const u32 size = lrc->ring.size;
-
- return ((head - tail - 1) & (size - 1)) + 1;
-}
-
-static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
- const void *data, size_t size)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
-
- iosys_map_incr(&ring, lrc->ring.tail);
- xe_map_memcpy_to(xe, &ring, 0, data, size);
- lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
-}
-
-void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
-{
- struct xe_device *xe = lrc_to_xe(lrc);
- struct iosys_map ring;
- u32 rhs;
- size_t aligned_size;
-
- xe_assert(xe, IS_ALIGNED(size, 4));
- aligned_size = ALIGN(size, 8);
-
- ring = __xe_lrc_ring_map(lrc);
-
- xe_assert(xe, lrc->ring.tail < lrc->ring.size);
- rhs = lrc->ring.size - lrc->ring.tail;
- if (size > rhs) {
- __xe_lrc_write_ring(lrc, ring, data, rhs);
- __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
- } else {
- __xe_lrc_write_ring(lrc, ring, data, size);
- }
-
- if (aligned_size > size) {
- u32 noop = MI_NOOP;
-
- __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
- }
-}
-
-u64 xe_lrc_descriptor(struct xe_lrc *lrc)
-{
- return lrc->desc | xe_lrc_ggtt_addr(lrc);
-}
-
-u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_seqno_ggtt_addr(lrc);
-}
-
-/**
- * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
- *
- * Allocate but don't initialize an lrc seqno fence.
- *
- * Return: Pointer to the allocated fence or
- * negative error pointer on error.
- */
-struct dma_fence *xe_lrc_alloc_seqno_fence(void)
-{
- return xe_hw_fence_alloc();
-}
-
-/**
- * xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
- * @fence: Pointer to the fence to free.
- *
- * Frees an lrc seqno fence that hasn't yet been
- * initialized.
- */
-void xe_lrc_free_seqno_fence(struct dma_fence *fence)
-{
- xe_hw_fence_free(fence);
-}
-
-/**
- * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
- * @lrc: Pointer to the lrc.
- * @fence: Pointer to the fence to initialize.
- *
- * Initializes a pre-allocated lrc seqno fence.
- * After initialization, the fence is subject to normal
- * dma-fence refcounting.
- */
-void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
-{
- xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
-}
-
-s32 xe_lrc_seqno(struct xe_lrc *lrc)
-{
- struct iosys_map map = __xe_lrc_seqno_map(lrc);
-
- return xe_map_read32(lrc_to_xe(lrc), &map);
-}
-
-s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
-{
- struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
-
- return xe_map_read32(lrc_to_xe(lrc), &map);
-}
-
-u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_start_seqno_ggtt_addr(lrc);
-}
-
-u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
-{
- return __xe_lrc_parallel_ggtt_addr(lrc);
-}
-
-struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
-{
- return __xe_lrc_parallel_map(lrc);
-}
-
-static int instr_dw(u32 cmd_header)
-{
- /* GFXPIPE "SINGLE_DW" opcodes are a single dword */
- if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
- GFXPIPE_SINGLE_DW_CMD(0, 0))
- return 1;
-
- /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
- if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
- return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
-
- /* Most instructions have the # of dwords (minus 2) in 7:0 */
- return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
-}
-
-static int dump_mi_command(struct drm_printer *p,
- struct xe_gt *gt,
- u32 *dw,
- int remaining_dw)
-{
- u32 inst_header = *dw;
- u32 numdw = instr_dw(inst_header);
- u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
- int num_noop;
-
- /* First check for commands that don't have/use a '# DW' field */
- switch (inst_header & MI_OPCODE) {
- case MI_NOOP:
- num_noop = 1;
- while (num_noop < remaining_dw &&
- (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
- num_noop++;
- drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
- return num_noop;
-
- case MI_TOPOLOGY_FILTER:
- drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
- return 1;
-
- case MI_BATCH_BUFFER_END:
- drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
- /* Return 'remaining_dw' to consume the rest of the LRC */
- return remaining_dw;
- }
-
- /*
- * Any remaining commands include a # of dwords. We should make sure
- * it doesn't exceed the remaining size of the LRC.
- */
- if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
- numdw = remaining_dw;
-
- switch (inst_header & MI_OPCODE) {
- case MI_LOAD_REGISTER_IMM:
- drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
- inst_header, (numdw - 1) / 2);
- for (int i = 1; i < numdw; i += 2)
- drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
- return numdw;
-
- case MI_LOAD_REGISTER_MEM & MI_OPCODE:
- drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
- inst_header,
- dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
- dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
- if (numdw == 4)
- drm_printf(p, " - %#6x = %#010llx\n",
- dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
- else
- drm_printf(p, " - %*ph (%s)\n",
- (int)sizeof(u32) * (numdw - 1), dw + 1,
- numdw < 4 ? "truncated" : "malformed");
- return numdw;
-
- case MI_FORCE_WAKEUP:
- drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
- return numdw;
-
- default:
- drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
- inst_header, opcode, numdw);
- return numdw;
- }
-}
-
-static int dump_gfxpipe_command(struct drm_printer *p,
- struct xe_gt *gt,
- u32 *dw,
- int remaining_dw)
-{
- u32 numdw = instr_dw(*dw);
- u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
- u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
- u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
-
- /*
- * Make sure we haven't mis-parsed a number of dwords that exceeds the
- * remaining size of the LRC.
- */
- if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
- numdw = remaining_dw;
-
- switch (*dw & GFXPIPE_MATCH_MASK) {
-#define MATCH(cmd) \
- case cmd: \
- drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
- return numdw
-#define MATCH3D(cmd) \
- case CMD_##cmd: \
- drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
- return numdw
-
- MATCH(STATE_BASE_ADDRESS);
- MATCH(STATE_SIP);
- MATCH(GPGPU_CSR_BASE_ADDRESS);
- MATCH(STATE_COMPUTE_MODE);
- MATCH3D(3DSTATE_BTD);
- MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
- MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
-
- MATCH3D(3DSTATE_VF_STATISTICS);
-
- MATCH(PIPELINE_SELECT);
-
- MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
- MATCH3D(3DSTATE_CLEAR_PARAMS);
- MATCH3D(3DSTATE_DEPTH_BUFFER);
- MATCH3D(3DSTATE_STENCIL_BUFFER);
- MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
- MATCH3D(3DSTATE_VERTEX_BUFFERS);
- MATCH3D(3DSTATE_VERTEX_ELEMENTS);
- MATCH3D(3DSTATE_INDEX_BUFFER);
- MATCH3D(3DSTATE_VF);
- MATCH3D(3DSTATE_MULTISAMPLE);
- MATCH3D(3DSTATE_CC_STATE_POINTERS);
- MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
- MATCH3D(3DSTATE_VS);
- MATCH3D(3DSTATE_GS);
- MATCH3D(3DSTATE_CLIP);
- MATCH3D(3DSTATE_SF);
- MATCH3D(3DSTATE_WM);
- MATCH3D(3DSTATE_CONSTANT_VS);
- MATCH3D(3DSTATE_CONSTANT_GS);
- MATCH3D(3DSTATE_CONSTANT_PS);
- MATCH3D(3DSTATE_SAMPLE_MASK);
- MATCH3D(3DSTATE_CONSTANT_HS);
- MATCH3D(3DSTATE_CONSTANT_DS);
- MATCH3D(3DSTATE_HS);
- MATCH3D(3DSTATE_TE);
- MATCH3D(3DSTATE_DS);
- MATCH3D(3DSTATE_STREAMOUT);
- MATCH3D(3DSTATE_SBE);
- MATCH3D(3DSTATE_PS);
- MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
- MATCH3D(3DSTATE_CPS_POINTERS);
- MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
- MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
- MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
- MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
- MATCH3D(3DSTATE_VF_INSTANCING);
- MATCH3D(3DSTATE_VF_SGVS);
- MATCH3D(3DSTATE_VF_TOPOLOGY);
- MATCH3D(3DSTATE_WM_CHROMAKEY);
- MATCH3D(3DSTATE_PS_BLEND);
- MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
- MATCH3D(3DSTATE_PS_EXTRA);
- MATCH3D(3DSTATE_RASTER);
- MATCH3D(3DSTATE_SBE_SWIZ);
- MATCH3D(3DSTATE_WM_HZ_OP);
- MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
- MATCH3D(3DSTATE_VF_SGVS_2);
- MATCH3D(3DSTATE_VFG);
- MATCH3D(3DSTATE_URB_ALLOC_VS);
- MATCH3D(3DSTATE_URB_ALLOC_HS);
- MATCH3D(3DSTATE_URB_ALLOC_DS);
- MATCH3D(3DSTATE_URB_ALLOC_GS);
- MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
- MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
- MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
- MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
- MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
- MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
- MATCH3D(3DSTATE_AMFS);
- MATCH3D(3DSTATE_DEPTH_BOUNDS);
- MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
- MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
- MATCH3D(3DSTATE_MESH_CONTROL);
- MATCH3D(3DSTATE_MESH_DISTRIB);
- MATCH3D(3DSTATE_TASK_REDISTRIB);
- MATCH3D(3DSTATE_MESH_SHADER);
- MATCH3D(3DSTATE_MESH_SHADER_DATA);
- MATCH3D(3DSTATE_TASK_CONTROL);
- MATCH3D(3DSTATE_TASK_SHADER);
- MATCH3D(3DSTATE_TASK_SHADER_DATA);
- MATCH3D(3DSTATE_URB_ALLOC_MESH);
- MATCH3D(3DSTATE_URB_ALLOC_TASK);
- MATCH3D(3DSTATE_CLIP_MESH);
- MATCH3D(3DSTATE_SBE_MESH);
- MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
-
- MATCH3D(3DSTATE_DRAWING_RECTANGLE);
- MATCH3D(3DSTATE_CHROMA_KEY);
- MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
- MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
- MATCH3D(3DSTATE_LINE_STIPPLE);
- MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
- MATCH3D(3DSTATE_MONOFILTER_SIZE);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
- MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
- MATCH3D(3DSTATE_SO_DECL_LIST);
- MATCH3D(3DSTATE_SO_BUFFER);
- MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
- MATCH3D(3DSTATE_SAMPLE_PATTERN);
- MATCH3D(3DSTATE_3D_MODE);
- MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
- MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
- MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
-
- default:
- drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
- *dw, pipeline, opcode, subopcode, numdw);
- return numdw;
- }
-}
-
-static int dump_gfx_state_command(struct drm_printer *p,
- struct xe_gt *gt,
- u32 *dw,
- int remaining_dw)
-{
- u32 numdw = instr_dw(*dw);
- u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
-
- /*
- * Make sure we haven't mis-parsed a number of dwords that exceeds the
- * remaining size of the LRC.
- */
- if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
- numdw = remaining_dw;
-
- switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
- MATCH(STATE_WRITE_INLINE);
-
- default:
- drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
- *dw, opcode, numdw);
- return numdw;
- }
-}
-
-void xe_lrc_dump_default(struct drm_printer *p,
- struct xe_gt *gt,
- enum xe_engine_class hwe_class)
-{
- u32 *dw;
- int remaining_dw, num_dw;
-
- if (!gt->default_lrc[hwe_class]) {
- drm_printf(p, "No default LRC for class %d\n", hwe_class);
- return;
- }
-
- /*
- * Skip the beginning of the LRC since it contains the per-process
- * hardware status page.
- */
- dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
- remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
-
- while (remaining_dw > 0) {
- if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
- num_dw = dump_mi_command(p, gt, dw, remaining_dw);
- } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
- num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
- } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
- num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
- } else {
- num_dw = min(instr_dw(*dw), remaining_dw);
- drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
- *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
- num_dw);
- }
-
- dw += num_dw;
- remaining_dw -= num_dw;
- }
-}
-
-struct instr_state {
- u32 instr;
- u16 num_dw;
-};
-
-static const struct instr_state xe_hpg_svg_state[] = {
- { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
- { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
- { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
- { .instr = CMD_3DSTATE_VS, .num_dw = 9 },
- { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
- { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
- { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
- { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
- { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
- { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
- { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
- { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
- { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SF, .num_dw = 4 },
- { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
- { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
- { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
- { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
- { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
- { .instr = CMD_3DSTATE_HS, .num_dw = 9 },
- { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
- { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
- { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
- { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
- { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
- { .instr = CMD_3DSTATE_TE, .num_dw = 5 },
- { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
- { .instr = CMD_3DSTATE_DS, .num_dw = 11 },
- { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
- { .instr = CMD_3DSTATE_GS, .num_dw = 10 },
- { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
- { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
- { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
- { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
- { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
- { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
- { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
-};
-
-void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
-{
- struct xe_gt *gt = q->hwe->gt;
- struct xe_device *xe = gt_to_xe(gt);
- const struct instr_state *state_table = NULL;
- int state_table_size = 0;
-
- /*
- * Wa_14019789679
- *
- * If the driver doesn't explicitly emit the SVG instructions while
- * setting up the default LRC, the context switch will write 0's
- * (noops) into the LRC memory rather than the expected instruction
- * headers. Application contexts start out as a copy of the default
- * LRC, and if they also do not emit specific settings for some SVG
- * state, then on context restore they'll unintentionally inherit
- * whatever state setting the previous context had programmed into the
- * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will
- * prevent the hardware from resetting that state back to any specific
- * value).
- *
- * The official workaround only requires emitting 3DSTATE_MESH_CONTROL
- * since that's a specific state setting that can easily cause GPU
- * hangs if unintentionally inherited. However to be safe we'll
- * continue to emit all of the SVG state since it's best not to leak
- * any of the state between contexts, even if that leakage is harmless.
- */
- if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
- state_table = xe_hpg_svg_state;
- state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
- }
-
- if (!state_table) {
- xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
- GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
- return;
- }
-
- for (int i = 0; i < state_table_size; i++) {
- u32 instr = state_table[i].instr;
- u16 num_dw = state_table[i].num_dw;
- bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
-
- xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
- xe_gt_assert(gt, num_dw != 0);
- xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
-
- /*
- * Xe2's SVG context is the same as the one on DG2 / MTL
- * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
- * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
- * Just make the replacement here rather than defining a
- * whole separate table for the single trivial change.
- */
- if (GRAPHICS_VER(xe) >= 20 &&
- instr == CMD_3DSTATE_DRAWING_RECTANGLE)
- instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
-
- bb->cs[bb->len] = instr;
- if (!is_single_dw)
- bb->cs[bb->len] |= (num_dw - 2);
-
- bb->len += num_dw;
- }
-}
-
-struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
-{
- struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
-
- if (!snapshot)
- return NULL;
-
-<<<<<<<
- if (lrc->bo && lrc->bo->vm)
-=======
- if (lrc->bo->vm)
->>>>>>>
- xe_vm_get(lrc->bo->vm);
-
- snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
- snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
- snapshot->head = xe_lrc_ring_head(lrc);
- snapshot->tail.internal = lrc->ring.tail;
- snapshot->tail.memory = xe_lrc_ring_tail(lrc);
- snapshot->start_seqno = xe_lrc_start_seqno(lrc);
- snapshot->seqno = xe_lrc_seqno(lrc);
- snapshot->lrc_bo = xe_bo_get(lrc->bo);
- snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
- snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
- snapshot->lrc_snapshot = NULL;
- snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
- snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
- return snapshot;
-}
-
-void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
-{
- struct xe_bo *bo;
- struct xe_vm *vm;
- struct iosys_map src;
-
- if (!snapshot)
- return;
-
- bo = snapshot->lrc_bo;
- vm = bo->vm;
- snapshot->lrc_bo = NULL;
-
- snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
- if (!snapshot->lrc_snapshot)
- goto put_bo;
-
- xe_bo_lock(bo, false);
- if (!ttm_bo_vmap(&bo->ttm, &src)) {
- xe_map_memcpy_from(xe_bo_device(bo),
- snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
- snapshot->lrc_size);
- ttm_bo_vunmap(&bo->ttm, &src);
- } else {
- kvfree(snapshot->lrc_snapshot);
- snapshot->lrc_snapshot = NULL;
- }
- xe_bo_unlock(bo);
-put_bo:
- xe_bo_put(bo);
- if (vm)
- xe_vm_put(vm);
-}
-
-void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
-{
- unsigned long i;
-
- if (!snapshot)
- return;
-
- drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
- drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
- snapshot->indirect_context_desc);
- drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
- drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
- snapshot->tail.internal, snapshot->tail.memory);
- drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
- drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
- drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
- drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
-
- if (!snapshot->lrc_snapshot)
- return;
-
- drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
- drm_puts(p, "\t[HWSP].data: ");
- for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
- u32 *val = snapshot->lrc_snapshot + i;
- char dumped[ASCII85_BUFSZ];
-
- drm_puts(p, ascii85_encode(*val, dumped));
- }
-
- drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
- drm_puts(p, "\t[HWCTX].data: ");
- for (; i < snapshot->lrc_size; i += sizeof(u32)) {
- u32 *val = snapshot->lrc_snapshot + i;
- char dumped[ASCII85_BUFSZ];
-
- drm_puts(p, ascii85_encode(*val, dumped));
- }
- drm_puts(p, "\n");
-}
-
-void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
-{
- if (!snapshot)
- return;
-
- kvfree(snapshot->lrc_snapshot);
- if (snapshot->lrc_bo) {
- struct xe_vm *vm;
-
- vm = snapshot->lrc_bo->vm;
- xe_bo_put(snapshot->lrc_bo);
- if (vm)
- xe_vm_put(vm);
- }
- kfree(snapshot);
-}
-
-/**
- * xe_lrc_update_timestamp() - Update ctx timestamp
- * @lrc: Pointer to the lrc.
- * @old_ts: Old timestamp value
- *
- * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
- * update saved value.
- *
- * Returns: New ctx timestamp value
- */
-u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
-{
- *old_ts = lrc->ctx_timestamp;
-
- lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
-
- return lrc->ctx_timestamp;
-}
diff --git a/rr-cache/641dfaef393e85daf275c6725aed0c69ef5046ad/preimage b/rr-cache/641dfaef393e85daf275c6725aed0c69ef5046ad/preimage
new file mode 100644
index 000000000000..be64058af509
--- /dev/null
+++ b/rr-cache/641dfaef393e85daf275c6725aed0c69ef5046ad/preimage
@@ -0,0 +1,1089 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 Broadcom
+ */
+
+/**
+ * DOC: VC4 HVS module.
+ *
+ * The Hardware Video Scaler (HVS) is the piece of hardware that does
+ * translation, scaling, colorspace conversion, and compositing of
+ * pixels stored in framebuffers into a FIFO of pixels going out to
+ * the Pixel Valve (CRTC). It operates at the system clock rate (the
+ * system audio clock gate, specifically), which is much higher than
+ * the pixel clock rate.
+ *
+ * There is a single global HVS, with multiple output FIFOs that can
+ * be consumed by the PVs. This file just manages the resources for
+ * the HVS, while the vc4_crtc.c code actually drives HVS setup for
+ * each CRTC.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/platform_device.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_vblank.h>
+
+#include <soc/bcm2835/raspberrypi-firmware.h>
+
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+static const struct debugfs_reg32 hvs_regs[] = {
+ VC4_REG32(SCALER_DISPCTRL),
+ VC4_REG32(SCALER_DISPSTAT),
+ VC4_REG32(SCALER_DISPID),
+ VC4_REG32(SCALER_DISPECTRL),
+ VC4_REG32(SCALER_DISPPROF),
+ VC4_REG32(SCALER_DISPDITHER),
+ VC4_REG32(SCALER_DISPEOLN),
+ VC4_REG32(SCALER_DISPLIST0),
+ VC4_REG32(SCALER_DISPLIST1),
+ VC4_REG32(SCALER_DISPLIST2),
+ VC4_REG32(SCALER_DISPLSTAT),
+ VC4_REG32(SCALER_DISPLACT0),
+ VC4_REG32(SCALER_DISPLACT1),
+ VC4_REG32(SCALER_DISPLACT2),
+ VC4_REG32(SCALER_DISPCTRL0),
+ VC4_REG32(SCALER_DISPBKGND0),
+ VC4_REG32(SCALER_DISPSTAT0),
+ VC4_REG32(SCALER_DISPBASE0),
+ VC4_REG32(SCALER_DISPCTRL1),
+ VC4_REG32(SCALER_DISPBKGND1),
+ VC4_REG32(SCALER_DISPSTAT1),
+ VC4_REG32(SCALER_DISPBASE1),
+ VC4_REG32(SCALER_DISPCTRL2),
+ VC4_REG32(SCALER_DISPBKGND2),
+ VC4_REG32(SCALER_DISPSTAT2),
+ VC4_REG32(SCALER_DISPBASE2),
+ VC4_REG32(SCALER_DISPALPHA2),
+ VC4_REG32(SCALER_OLEDOFFS),
+ VC4_REG32(SCALER_OLEDCOEF0),
+ VC4_REG32(SCALER_OLEDCOEF1),
+ VC4_REG32(SCALER_OLEDCOEF2),
+};
+
+void vc4_hvs_dump_state(struct vc4_hvs *hvs)
+{
+ struct drm_device *drm = &hvs->vc4->base;
+ struct drm_printer p = drm_info_printer(&hvs->pdev->dev);
+ int idx, i;
+
+ if (!drm_dev_enter(drm, &idx))
+ return;
+
+ drm_print_regset32(&p, &hvs->regset);
+
+ DRM_INFO("HVS ctx:\n");
+ for (i = 0; i < 64; i += 4) {
+ DRM_INFO("0x%08x (%s): 0x%08x 0x%08x 0x%08x 0x%08x\n",
+ i * 4, i < HVS_BOOTLOADER_DLIST_END ? "B" : "D",
+ readl((u32 __iomem *)hvs->dlist + i + 0),
+ readl((u32 __iomem *)hvs->dlist + i + 1),
+ readl((u32 __iomem *)hvs->dlist + i + 2),
+ readl((u32 __iomem *)hvs->dlist + i + 3));
+ }
+
+ drm_dev_exit(idx);
+}
+
+static int vc4_hvs_debugfs_underrun(struct seq_file *m, void *data)
+{
+ struct drm_debugfs_entry *entry = m->private;
+ struct drm_device *dev = entry->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ drm_printf(&p, "%d\n", atomic_read(&vc4->underrun));
+
+ return 0;
+}
+
+static int vc4_hvs_debugfs_dlist(struct seq_file *m, void *data)
+{
+ struct drm_debugfs_entry *entry = m->private;
+ struct drm_device *dev = entry->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_hvs *hvs = vc4->hvs;
+ struct drm_printer p = drm_seq_file_printer(m);
+ unsigned int next_entry_start = 0;
+ unsigned int i, j;
+ u32 dlist_word, dispstat;
+
+ for (i = 0; i < SCALER_CHANNELS_COUNT; i++) {
+ dispstat = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTATX(i)),
+ SCALER_DISPSTATX_MODE);
+ if (dispstat == SCALER_DISPSTATX_MODE_DISABLED ||
+ dispstat == SCALER_DISPSTATX_MODE_EOF) {
+ drm_printf(&p, "HVS chan %u disabled\n", i);
+ continue;
+ }
+
+ drm_printf(&p, "HVS chan %u:\n", i);
+
+ for (j = HVS_READ(SCALER_DISPLISTX(i)); j < 256; j++) {
+ dlist_word = readl((u32 __iomem *)vc4->hvs->dlist + j);
+ drm_printf(&p, "dlist: %02d: 0x%08x\n", j,
+ dlist_word);
+ if (!next_entry_start ||
+ next_entry_start == j) {
+ if (dlist_word & SCALER_CTL0_END)
+ break;
+ next_entry_start = j +
+ VC4_GET_FIELD(dlist_word,
+ SCALER_CTL0_SIZE);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/* The filter kernel is composed of dwords each containing 3 9-bit
+ * signed integers packed next to each other.
+ */
+#define VC4_INT_TO_COEFF(coeff) (coeff & 0x1ff)
+#define VC4_PPF_FILTER_WORD(c0, c1, c2) \
+ ((((c0) & 0x1ff) << 0) | \
+ (((c1) & 0x1ff) << 9) | \
+ (((c2) & 0x1ff) << 18))
+
+/* The whole filter kernel is arranged as the coefficients 0-16 going
+ * up, then a pad, then 17-31 going down and reversed within the
+ * dwords. This means that a linear phase kernel (where it's
+ * symmetrical at the boundary between 15 and 16) has the last 5
+ * dwords matching the first 5, but reversed.
+ */
+#define VC4_LINEAR_PHASE_KERNEL(c0, c1, c2, c3, c4, c5, c6, c7, c8, \
+ c9, c10, c11, c12, c13, c14, c15) \
+ {VC4_PPF_FILTER_WORD(c0, c1, c2), \
+ VC4_PPF_FILTER_WORD(c3, c4, c5), \
+ VC4_PPF_FILTER_WORD(c6, c7, c8), \
+ VC4_PPF_FILTER_WORD(c9, c10, c11), \
+ VC4_PPF_FILTER_WORD(c12, c13, c14), \
+ VC4_PPF_FILTER_WORD(c15, c15, 0)}
+
+#define VC4_LINEAR_PHASE_KERNEL_DWORDS 6
+#define VC4_KERNEL_DWORDS (VC4_LINEAR_PHASE_KERNEL_DWORDS * 2 - 1)
+
+/* Recommended B=1/3, C=1/3 filter choice from Mitchell/Netravali.
+ * http://www.cs.utexas.edu/~fussell/courses/cs384g/lectures/mitchell/Mitchell.pdf
+ */
+static const u32 mitchell_netravali_1_3_1_3_kernel[] =
+ VC4_LINEAR_PHASE_KERNEL(0, -2, -6, -8, -10, -8, -3, 2, 18,
+ 50, 82, 119, 155, 187, 213, 227);
+
+static int vc4_hvs_upload_linear_kernel(struct vc4_hvs *hvs,
+ struct drm_mm_node *space,
+ const u32 *kernel)
+{
+ int ret, i;
+ u32 __iomem *dst_kernel;
+
+ /*
+ * NOTE: We don't need a call to drm_dev_enter()/drm_dev_exit()
+ * here since that function is only called from vc4_hvs_bind().
+ */
+
+ ret = drm_mm_insert_node(&hvs->dlist_mm, space, VC4_KERNEL_DWORDS);
+ if (ret) {
+ drm_err(&hvs->vc4->base, "Failed to allocate space for filter kernel: %d\n",
+ ret);
+ return ret;
+ }
+
+ dst_kernel = hvs->dlist + space->start;
+
+ for (i = 0; i < VC4_KERNEL_DWORDS; i++) {
+ if (i < VC4_LINEAR_PHASE_KERNEL_DWORDS)
+ writel(kernel[i], &dst_kernel[i]);
+ else {
+ writel(kernel[VC4_KERNEL_DWORDS - i - 1],
+ &dst_kernel[i]);
+ }
+ }
+
+ return 0;
+}
+
+static void vc4_hvs_lut_load(struct vc4_hvs *hvs,
+ struct vc4_crtc *vc4_crtc)
+{
+ struct drm_device *drm = &hvs->vc4->base;
+ struct drm_crtc *crtc = &vc4_crtc->base;
+ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
+ int idx;
+ u32 i;
+
+ if (!drm_dev_enter(drm, &idx))
+ return;
+
+<<<<<<<
+=======
+ if (hvs->vc4->gen == VC4_GEN_4)
+ goto exit;
+
+>>>>>>>
+ /* The LUT memory is laid out with each HVS channel in order,
+ * each of which takes 256 writes for R, 256 for G, then 256
+ * for B.
+ */
+ HVS_WRITE(SCALER_GAMADDR,
+ SCALER_GAMADDR_AUTOINC |
+ (vc4_state->assigned_channel * 3 * crtc->gamma_size));
+
+ for (i = 0; i < crtc->gamma_size; i++)
+ HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_r[i]);
+ for (i = 0; i < crtc->gamma_size; i++)
+ HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_g[i]);
+ for (i = 0; i < crtc->gamma_size; i++)
+ HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_b[i]);
+
+exit:
+ drm_dev_exit(idx);
+}
+
+static void vc4_hvs_update_gamma_lut(struct vc4_hvs *hvs,
+ struct vc4_crtc *vc4_crtc)
+{
+ struct drm_crtc_state *crtc_state = vc4_crtc->base.state;
+ struct drm_color_lut *lut = crtc_state->gamma_lut->data;
+ u32 length = drm_color_lut_size(crtc_state->gamma_lut);
+ u32 i;
+
+ for (i = 0; i < length; i++) {
+ vc4_crtc->lut_r[i] = drm_color_lut_extract(lut[i].red, 8);
+ vc4_crtc->lut_g[i] = drm_color_lut_extract(lut[i].green, 8);
+ vc4_crtc->lut_b[i] = drm_color_lut_extract(lut[i].blue, 8);
+ }
+
+ vc4_hvs_lut_load(hvs, vc4_crtc);
+}
+
+u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo)
+{
+ struct drm_device *drm = &hvs->vc4->base;
+ u8 field = 0;
+ int idx;
+
+ if (!drm_dev_enter(drm, &idx))
+ return 0;
+
+ switch (fifo) {
+ case 0:
+ field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1),
+ SCALER_DISPSTAT1_FRCNT0);
+ break;
+ case 1:
+ field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1),
+ SCALER_DISPSTAT1_FRCNT1);
+ break;
+ case 2:
+ field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT2),
+ SCALER_DISPSTAT2_FRCNT2);
+ break;
+ }
+
+ drm_dev_exit(idx);
+ return field;
+}
+
+int vc4_hvs_get_fifo_from_output(struct vc4_hvs *hvs, unsigned int output)
+{
+ struct vc4_dev *vc4 = hvs->vc4;
+ u32 reg;
+ int ret;
+
+ if (!vc4->is_vc5)
+ return output;
+
+ /*
+ * NOTE: We should probably use drm_dev_enter()/drm_dev_exit()
+ * here, but this function is only used during the DRM device
+ * initialization, so we should be fine.
+ */
+
+ switch (output) {
+ case 0:
+ return 0;
+
+ case 1:
+ return 1;
+
+ case 2:
+ reg = HVS_READ(SCALER_DISPECTRL);
+ ret = FIELD_GET(SCALER_DISPECTRL_DSP2_MUX_MASK, reg);
+ if (ret == 0)
+ return 2;
+
+ return 0;
+
+ case 3:
+ reg = HVS_READ(SCALER_DISPCTRL);
+ ret = FIELD_GET(SCALER_DISPCTRL_DSP3_MUX_MASK, reg);
+ if (ret == 3)
+ return -EPIPE;
+
+ return ret;
+
+ case 4:
+ reg = HVS_READ(SCALER_DISPEOLN);
+ ret = FIELD_GET(SCALER_DISPEOLN_DSP4_MUX_MASK, reg);
+ if (ret == 3)
+ return -EPIPE;
+
+ return ret;
+
+ case 5:
+ reg = HVS_READ(SCALER_DISPDITHER);
+ ret = FIELD_GET(SCALER_DISPDITHER_DSP5_MUX_MASK, reg);
+ if (ret == 3)
+ return -EPIPE;
+
+ return ret;
+
+ default:
+ return -EPIPE;
+ }
+}
+
+static int vc4_hvs_init_channel(struct vc4_hvs *hvs, struct drm_crtc *crtc,
+ struct drm_display_mode *mode, bool oneshot)
+{
+ struct vc4_dev *vc4 = hvs->vc4;
+ struct drm_device *drm = &vc4->base;
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+ struct vc4_crtc_state *vc4_crtc_state = to_vc4_crtc_state(crtc->state);
+ unsigned int chan = vc4_crtc_state->assigned_channel;
+ bool interlace = mode->flags & DRM_MODE_FLAG_INTERLACE;
+ u32 dispbkgndx;
+ u32 dispctrl;
+ int idx;
+
+ if (!drm_dev_enter(drm, &idx))
+ return -ENODEV;
+
+ HVS_WRITE(SCALER_DISPCTRLX(chan), 0);
+ HVS_WRITE(SCALER_DISPCTRLX(chan), SCALER_DISPCTRLX_RESET);
+ HVS_WRITE(SCALER_DISPCTRLX(chan), 0);
+
+ /* Turn on the scaler, which will wait for vstart to start
+ * compositing.
+ * When feeding the transposer, we should operate in oneshot
+ * mode.
+ */
+ dispctrl = SCALER_DISPCTRLX_ENABLE;
+ dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(chan));
+
+ if (!vc4->is_vc5) {
+ dispctrl |= VC4_SET_FIELD(mode->hdisplay,
+ SCALER_DISPCTRLX_WIDTH) |
+ VC4_SET_FIELD(mode->vdisplay,
+ SCALER_DISPCTRLX_HEIGHT) |
+ (oneshot ? SCALER_DISPCTRLX_ONESHOT : 0);
+ dispbkgndx |= SCALER_DISPBKGND_AUTOHS;
+ } else {
+ dispctrl |= VC4_SET_FIELD(mode->hdisplay,
+ SCALER5_DISPCTRLX_WIDTH) |
+ VC4_SET_FIELD(mode->vdisplay,
+ SCALER5_DISPCTRLX_HEIGHT) |
+ (oneshot ? SCALER5_DISPCTRLX_ONESHOT : 0);
+ dispbkgndx &= ~SCALER5_DISPBKGND_BCK2BCK;
+ }
+
+ HVS_WRITE(SCALER_DISPCTRLX(chan), dispctrl);
+
+ dispbkgndx &= ~SCALER_DISPBKGND_GAMMA;
+ dispbkgndx &= ~SCALER_DISPBKGND_INTERLACE;
+
+ HVS_WRITE(SCALER_DISPBKGNDX(chan), dispbkgndx |
+ ((!vc4->is_vc5) ? SCALER_DISPBKGND_GAMMA : 0) |
+ (interlace ? SCALER_DISPBKGND_INTERLACE : 0));
+
+ /* Reload the LUT, since the SRAMs would have been disabled if
+ * all CRTCs had SCALER_DISPBKGND_GAMMA unset at once.
+ */
+ vc4_hvs_lut_load(hvs, vc4_crtc);
+
+ drm_dev_exit(idx);
+
+ return 0;
+}
+
+void vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan)
+{
+ struct drm_device *drm = &hvs->vc4->base;
+ int idx;
+
+ if (!drm_dev_enter(drm, &idx))
+ return;
+
+ if (HVS_READ(SCALER_DISPCTRLX(chan)) & SCALER_DISPCTRLX_ENABLE)
+ goto out;
+
+ HVS_WRITE(SCALER_DISPCTRLX(chan),
+ HVS_READ(SCALER_DISPCTRLX(chan)) | SCALER_DISPCTRLX_RESET);
+ HVS_WRITE(SCALER_DISPCTRLX(chan),
+ HVS_READ(SCALER_DISPCTRLX(chan)) & ~SCALER_DISPCTRLX_ENABLE);
+
+ /* Once we leave, the scaler should be disabled and its fifo empty. */
+ WARN_ON_ONCE(HVS_READ(SCALER_DISPCTRLX(chan)) & SCALER_DISPCTRLX_RESET);
+
+ WARN_ON_ONCE(VC4_GET_FIELD(HVS_READ(SCALER_DISPSTATX(chan)),
+ SCALER_DISPSTATX_MODE) !=
+ SCALER_DISPSTATX_MODE_DISABLED);
+
+ WARN_ON_ONCE((HVS_READ(SCALER_DISPSTATX(chan)) &
+ (SCALER_DISPSTATX_FULL | SCALER_DISPSTATX_EMPTY)) !=
+ SCALER_DISPSTATX_EMPTY);
+
+out:
+ drm_dev_exit(idx);
+}
+
+int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state)
+{
+ struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc);
+ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc_state);
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_plane *plane;
+ unsigned long flags;
+ const struct drm_plane_state *plane_state;
+ u32 dlist_count = 0;
+ int ret;
+
+ /* The pixelvalve can only feed one encoder (and encoders are
+ * 1:1 with connectors.)
+ */
+ if (hweight32(crtc_state->connector_mask) > 1)
+ return -EINVAL;
+
+ drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state)
+ dlist_count += vc4_plane_dlist_size(plane_state);
+
+ dlist_count++; /* Account for SCALER_CTL0_END. */
+
+ spin_lock_irqsave(&vc4->hvs->mm_lock, flags);
+ ret = drm_mm_insert_node(&vc4->hvs->dlist_mm, &vc4_state->mm,
+ dlist_count);
+ spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static void vc4_hvs_install_dlist(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_hvs *hvs = vc4->hvs;
+ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
+ int idx;
+
+ if (!drm_dev_enter(dev, &idx))
+ return;
+
+ HVS_WRITE(SCALER_DISPLISTX(vc4_state->assigned_channel),
+ vc4_state->mm.start);
+
+ drm_dev_exit(idx);
+}
+
+static void vc4_hvs_update_dlist(struct drm_crtc *crtc)
+{
+ struct drm_device *dev = crtc->dev;
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
+ unsigned long flags;
+
+ if (crtc->state->event) {
+ crtc->state->event->pipe = drm_crtc_index(crtc);
+
+ WARN_ON(drm_crtc_vblank_get(crtc) != 0);
+
+ spin_lock_irqsave(&dev->event_lock, flags);
+
+ if (!vc4_crtc->feeds_txp || vc4_state->txp_armed) {
+ vc4_crtc->event = crtc->state->event;
+ crtc->state->event = NULL;
+ }
+
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+ }
+
+ spin_lock_irqsave(&vc4_crtc->irq_lock, flags);
+ vc4_crtc->current_dlist = vc4_state->mm.start;
+ spin_unlock_irqrestore(&vc4_crtc->irq_lock, flags);
+}
+
+void vc4_hvs_atomic_begin(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+{
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
+ unsigned long flags;
+
+ spin_lock_irqsave(&vc4_crtc->irq_lock, flags);
+ vc4_crtc->current_hvs_channel = vc4_state->assigned_channel;
+ spin_unlock_irqrestore(&vc4_crtc->irq_lock, flags);
+}
+
+void vc4_hvs_atomic_enable(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+{
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_display_mode *mode = &crtc->state->adjusted_mode;
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+ bool oneshot = vc4_crtc->feeds_txp;
+
+ vc4_hvs_install_dlist(crtc);
+ vc4_hvs_update_dlist(crtc);
+ vc4_hvs_init_channel(vc4->hvs, crtc, mode, oneshot);
+}
+
+void vc4_hvs_atomic_disable(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+{
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_crtc_state *old_state = drm_atomic_get_old_crtc_state(state, crtc);
+ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(old_state);
+ unsigned int chan = vc4_state->assigned_channel;
+
+ vc4_hvs_stop_channel(vc4->hvs, chan);
+}
+
+void vc4_hvs_atomic_flush(struct drm_crtc *crtc,
+ struct drm_atomic_state *state)
+{
+ struct drm_crtc_state *old_state = drm_atomic_get_old_crtc_state(state,
+ crtc);
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_hvs *hvs = vc4->hvs;
+ struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+ struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
+ unsigned int channel = vc4_state->assigned_channel;
+ struct drm_plane *plane;
+ struct vc4_plane_state *vc4_plane_state;
+ bool debug_dump_regs = false;
+ bool enable_bg_fill = false;
+ u32 __iomem *dlist_start = vc4->hvs->dlist + vc4_state->mm.start;
+ u32 __iomem *dlist_next = dlist_start;
+ unsigned int zpos = 0;
+ bool found = false;
+ int idx;
+
+ if (!drm_dev_enter(dev, &idx)) {
+ vc4_crtc_send_vblank(crtc);
+ return;
+ }
+
+ if (vc4_state->assigned_channel == VC4_HVS_CHANNEL_DISABLED)
+ return;
+
+ if (debug_dump_regs) {
+ DRM_INFO("CRTC %d HVS before:\n", drm_crtc_index(crtc));
+ vc4_hvs_dump_state(hvs);
+ }
+
+ /* Copy all the active planes' dlist contents to the hardware dlist. */
+ do {
+ found = false;
+
+ drm_atomic_crtc_for_each_plane(plane, crtc) {
+ if (plane->state->normalized_zpos != zpos)
+ continue;
+
+ /* Is this the first active plane? */
+ if (dlist_next == dlist_start) {
+ /* We need to enable background fill when a plane
+ * could be alpha blending from the background, i.e.
+ * where no other plane is underneath. It suffices to
+ * consider the first active plane here since we set
+ * needs_bg_fill such that either the first plane
+ * already needs it or all planes on top blend from
+ * the first or a lower plane.
+ */
+ vc4_plane_state = to_vc4_plane_state(plane->state);
+ enable_bg_fill = vc4_plane_state->needs_bg_fill;
+ }
+
+ dlist_next += vc4_plane_write_dlist(plane, dlist_next);
+
+ found = true;
+ }
+
+ zpos++;
+ } while (found);
+
+ writel(SCALER_CTL0_END, dlist_next);
+ dlist_next++;
+
+ WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size);
+
+ if (enable_bg_fill)
+ /* This sets a black background color fill, as is the case
+ * with other DRM drivers.
+ */
+ HVS_WRITE(SCALER_DISPBKGNDX(channel),
+ HVS_READ(SCALER_DISPBKGNDX(channel)) |
+ SCALER_DISPBKGND_FILL);
+
+ /* Only update DISPLIST if the CRTC was already running and is not
+ * being disabled.
+ * vc4_crtc_enable() takes care of updating the dlist just after
+ * re-enabling VBLANK interrupts and before enabling the engine.
+ * If the CRTC is being disabled, there's no point in updating this
+ * information.
+ */
+ if (crtc->state->active && old_state->active) {
+ vc4_hvs_install_dlist(crtc);
+ vc4_hvs_update_dlist(crtc);
+ }
+
+ if (crtc->state->color_mgmt_changed) {
+ u32 dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(channel));
+
+ if (crtc->state->gamma_lut) {
+ vc4_hvs_update_gamma_lut(hvs, vc4_crtc);
+ dispbkgndx |= SCALER_DISPBKGND_GAMMA;
+ } else {
+ /* Unsetting DISPBKGND_GAMMA skips the gamma lut step
+ * in hardware, which is the same as a linear lut that
+ * DRM expects us to use in absence of a user lut.
+ */
+ dispbkgndx &= ~SCALER_DISPBKGND_GAMMA;
+ }
+ HVS_WRITE(SCALER_DISPBKGNDX(channel), dispbkgndx);
+ }
+
+ if (debug_dump_regs) {
+ DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc));
+ vc4_hvs_dump_state(hvs);
+ }
+
+ drm_dev_exit(idx);
+}
+
+void vc4_hvs_mask_underrun(struct vc4_hvs *hvs, int channel)
+{
+ struct drm_device *drm = &hvs->vc4->base;
+ u32 dispctrl;
+ int idx;
+
+ if (!drm_dev_enter(drm, &idx))
+ return;
+
+ dispctrl = HVS_READ(SCALER_DISPCTRL);
+ dispctrl &= ~(hvs->vc4->is_vc5 ? SCALER5_DISPCTRL_DSPEISLUR(channel) :
+ SCALER_DISPCTRL_DSPEISLUR(channel));
+
+ HVS_WRITE(SCALER_DISPCTRL, dispctrl);
+
+ drm_dev_exit(idx);
+}
+
+void vc4_hvs_unmask_underrun(struct vc4_hvs *hvs, int channel)
+{
+ struct drm_device *drm = &hvs->vc4->base;
+ u32 dispctrl;
+ int idx;
+
+ if (!drm_dev_enter(drm, &idx))
+ return;
+
+ dispctrl = HVS_READ(SCALER_DISPCTRL);
+ dispctrl |= (hvs->vc4->is_vc5 ? SCALER5_DISPCTRL_DSPEISLUR(channel) :
+ SCALER_DISPCTRL_DSPEISLUR(channel));
+
+ HVS_WRITE(SCALER_DISPSTAT,
+ SCALER_DISPSTAT_EUFLOW(channel));
+ HVS_WRITE(SCALER_DISPCTRL, dispctrl);
+
+ drm_dev_exit(idx);
+}
+
+static void vc4_hvs_report_underrun(struct drm_device *dev)
+{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ atomic_inc(&vc4->underrun);
+ DRM_DEV_ERROR(dev->dev, "HVS underrun\n");
+}
+
+static irqreturn_t vc4_hvs_irq_handler(int irq, void *data)
+{
+ struct drm_device *dev = data;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct vc4_hvs *hvs = vc4->hvs;
+ irqreturn_t irqret = IRQ_NONE;
+ int channel;
+ u32 control;
+ u32 status;
+ u32 dspeislur;
+
+ /*
+ * NOTE: We don't need to protect the register access using
+ * drm_dev_enter() there because the interrupt handler lifetime
+ * is tied to the device itself, and not to the DRM device.
+ *
+ * So when the device will be gone, one of the first thing we
+ * will be doing will be to unregister the interrupt handler,
+ * and then unregister the DRM device. drm_dev_enter() would
+ * thus always succeed if we are here.
+ */
+
+ status = HVS_READ(SCALER_DISPSTAT);
+ control = HVS_READ(SCALER_DISPCTRL);
+
+ for (channel = 0; channel < SCALER_CHANNELS_COUNT; channel++) {
+ dspeislur = vc4->is_vc5 ? SCALER5_DISPCTRL_DSPEISLUR(channel) :
+ SCALER_DISPCTRL_DSPEISLUR(channel);
+ /* Interrupt masking is not always honored, so check it here. */
+ if (status & SCALER_DISPSTAT_EUFLOW(channel) &&
+ control & dspeislur) {
+ vc4_hvs_mask_underrun(hvs, channel);
+ vc4_hvs_report_underrun(dev);
+
+ irqret = IRQ_HANDLED;
+ }
+ }
+
+ /* Clear every per-channel interrupt flag. */
+ HVS_WRITE(SCALER_DISPSTAT, SCALER_DISPSTAT_IRQMASK(0) |
+ SCALER_DISPSTAT_IRQMASK(1) |
+ SCALER_DISPSTAT_IRQMASK(2));
+
+ return irqret;
+}
+
+int vc4_hvs_debugfs_init(struct drm_minor *minor)
+{
+ struct drm_device *drm = minor->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ struct vc4_hvs *hvs = vc4->hvs;
+
+ if (!vc4->hvs)
+ return -ENODEV;
+
+ if (!vc4->is_vc5)
+ debugfs_create_bool("hvs_load_tracker", S_IRUGO | S_IWUSR,
+ minor->debugfs_root,
+ &vc4->load_tracker_enabled);
+
+ drm_debugfs_add_file(drm, "hvs_dlists", vc4_hvs_debugfs_dlist, NULL);
+
+ drm_debugfs_add_file(drm, "hvs_underrun", vc4_hvs_debugfs_underrun, NULL);
+
+ vc4_debugfs_add_regset32(drm, "hvs_regs", &hvs->regset);
+
+ return 0;
+}
+
+struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4, struct platform_device *pdev)
+{
+ struct drm_device *drm = &vc4->base;
+ struct vc4_hvs *hvs;
+
+ hvs = drmm_kzalloc(drm, sizeof(*hvs), GFP_KERNEL);
+ if (!hvs)
+ return ERR_PTR(-ENOMEM);
+
+ hvs->vc4 = vc4;
+ hvs->pdev = pdev;
+
+ spin_lock_init(&hvs->mm_lock);
+
+ /* Set up the HVS display list memory manager. We never
+ * overwrite the setup from the bootloader (just 128b out of
+ * our 16K), since we don't want to scramble the screen when
+ * transitioning from the firmware's boot setup to runtime.
+ */
+ drm_mm_init(&hvs->dlist_mm,
+ HVS_BOOTLOADER_DLIST_END,
+ (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END);
+
+ /* Set up the HVS LBM memory manager. We could have some more
+ * complicated data structure that allowed reuse of LBM areas
+ * between planes when they don't overlap on the screen, but
+ * for now we just allocate globally.
+ */
+ if (!vc4->is_vc5)
+ /* 48k words of 2x12-bit pixels */
+ drm_mm_init(&hvs->lbm_mm, 0, 48 * 1024);
+ else
+ /* 60k words of 4x12-bit pixels */
+ drm_mm_init(&hvs->lbm_mm, 0, 60 * 1024);
+
+ vc4->hvs = hvs;
+
+ return hvs;
+}
+
+static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ struct vc4_hvs *hvs = NULL;
+ int ret;
+ u32 dispctrl;
+ u32 reg, top;
+
+ hvs = __vc4_hvs_alloc(vc4, NULL);
+ if (IS_ERR(hvs))
+ return PTR_ERR(hvs);
+
+ hvs->regs = vc4_ioremap_regs(pdev, 0);
+ if (IS_ERR(hvs->regs))
+ return PTR_ERR(hvs->regs);
+
+ hvs->regset.base = hvs->regs;
+ hvs->regset.regs = hvs_regs;
+ hvs->regset.nregs = ARRAY_SIZE(hvs_regs);
+
+ if (vc4->is_vc5) {
+ struct rpi_firmware *firmware;
+ struct device_node *node;
+ unsigned int max_rate;
+
+ node = rpi_firmware_find_node();
+ if (!node)
+ return -EINVAL;
+
+ firmware = rpi_firmware_get(node);
+ of_node_put(node);
+ if (!firmware)
+ return -EPROBE_DEFER;
+
+ hvs->core_clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(hvs->core_clk)) {
+ dev_err(&pdev->dev, "Couldn't get core clock\n");
+ return PTR_ERR(hvs->core_clk);
+ }
+
+ max_rate = rpi_firmware_clk_get_max_rate(firmware,
+ RPI_FIRMWARE_CORE_CLK_ID);
+ rpi_firmware_put(firmware);
+ if (max_rate >= 550000000)
+ hvs->vc5_hdmi_enable_hdmi_20 = true;
+
+ if (max_rate >= 600000000)
+ hvs->vc5_hdmi_enable_4096by2160 = true;
+
+ hvs->max_core_rate = max_rate;
+
+ ret = clk_prepare_enable(hvs->core_clk);
+ if (ret) {
+ dev_err(&pdev->dev, "Couldn't enable the core clock\n");
+ return ret;
+ }
+ }
+
+ if (!vc4->is_vc5)
+ hvs->dlist = hvs->regs + SCALER_DLIST_START;
+ else
+ hvs->dlist = hvs->regs + SCALER5_DLIST_START;
+
+ /* Upload filter kernels. We only have the one for now, so we
+ * keep it around for the lifetime of the driver.
+ */
+ ret = vc4_hvs_upload_linear_kernel(hvs,
+ &hvs->mitchell_netravali_filter,
+ mitchell_netravali_1_3_1_3_kernel);
+ if (ret)
+ return ret;
+
+ reg = HVS_READ(SCALER_DISPECTRL);
+ reg &= ~SCALER_DISPECTRL_DSP2_MUX_MASK;
+ HVS_WRITE(SCALER_DISPECTRL,
+ reg | VC4_SET_FIELD(0, SCALER_DISPECTRL_DSP2_MUX));
+
+ reg = HVS_READ(SCALER_DISPCTRL);
+ reg &= ~SCALER_DISPCTRL_DSP3_MUX_MASK;
+ HVS_WRITE(SCALER_DISPCTRL,
+ reg | VC4_SET_FIELD(3, SCALER_DISPCTRL_DSP3_MUX));
+
+ reg = HVS_READ(SCALER_DISPEOLN);
+ reg &= ~SCALER_DISPEOLN_DSP4_MUX_MASK;
+ HVS_WRITE(SCALER_DISPEOLN,
+ reg | VC4_SET_FIELD(3, SCALER_DISPEOLN_DSP4_MUX));
+
+ reg = HVS_READ(SCALER_DISPDITHER);
+ reg &= ~SCALER_DISPDITHER_DSP5_MUX_MASK;
+ HVS_WRITE(SCALER_DISPDITHER,
+ reg | VC4_SET_FIELD(3, SCALER_DISPDITHER_DSP5_MUX));
+
+ dispctrl = HVS_READ(SCALER_DISPCTRL);
+
+ dispctrl |= SCALER_DISPCTRL_ENABLE;
+ dispctrl |= SCALER_DISPCTRL_DISPEIRQ(0) |
+ SCALER_DISPCTRL_DISPEIRQ(1) |
+ SCALER_DISPCTRL_DISPEIRQ(2);
+
+ if (!vc4->is_vc5)
+ dispctrl &= ~(SCALER_DISPCTRL_DMAEIRQ |
+ SCALER_DISPCTRL_SLVWREIRQ |
+ SCALER_DISPCTRL_SLVRDEIRQ |
+ SCALER_DISPCTRL_DSPEIEOF(0) |
+ SCALER_DISPCTRL_DSPEIEOF(1) |
+ SCALER_DISPCTRL_DSPEIEOF(2) |
+ SCALER_DISPCTRL_DSPEIEOLN(0) |
+ SCALER_DISPCTRL_DSPEIEOLN(1) |
+ SCALER_DISPCTRL_DSPEIEOLN(2) |
+ SCALER_DISPCTRL_DSPEISLUR(0) |
+ SCALER_DISPCTRL_DSPEISLUR(1) |
+ SCALER_DISPCTRL_DSPEISLUR(2) |
+ SCALER_DISPCTRL_SCLEIRQ);
+ else
+ dispctrl &= ~(SCALER_DISPCTRL_DMAEIRQ |
+ SCALER5_DISPCTRL_SLVEIRQ |
+ SCALER5_DISPCTRL_DSPEIEOF(0) |
+ SCALER5_DISPCTRL_DSPEIEOF(1) |
+ SCALER5_DISPCTRL_DSPEIEOF(2) |
+ SCALER5_DISPCTRL_DSPEIEOLN(0) |
+ SCALER5_DISPCTRL_DSPEIEOLN(1) |
+ SCALER5_DISPCTRL_DSPEIEOLN(2) |
+ SCALER5_DISPCTRL_DSPEISLUR(0) |
+ SCALER5_DISPCTRL_DSPEISLUR(1) |
+ SCALER5_DISPCTRL_DSPEISLUR(2) |
+ SCALER_DISPCTRL_SCLEIRQ);
+
+
+ /* Set AXI panic mode.
+ * VC4 panics when < 2 lines in FIFO.
+ * VC5 panics when less than 1 line in the FIFO.
+ */
+ dispctrl &= ~(SCALER_DISPCTRL_PANIC0_MASK |
+ SCALER_DISPCTRL_PANIC1_MASK |
+ SCALER_DISPCTRL_PANIC2_MASK);
+ dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC0);
+ dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC1);
+ dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC2);
+
+ HVS_WRITE(SCALER_DISPCTRL, dispctrl);
+
+ /* Recompute Composite Output Buffer (COB) allocations for the displays
+ */
+ if (!vc4->is_vc5) {
+ /* The COB is 20736 pixels, or just over 10 lines at 2048 wide.
+ * The bottom 2048 pixels are full 32bpp RGBA (intended for the
+ * TXP composing RGBA to memory), whilst the remainder are only
+ * 24bpp RGB.
+ *
+ * Assign 3 lines to channels 1 & 2, and just over 4 lines to
+ * channel 0.
+ */
+ #define VC4_COB_SIZE 20736
+ #define VC4_COB_LINE_WIDTH 2048
+ #define VC4_COB_NUM_LINES 3
+ reg = 0;
+ top = VC4_COB_LINE_WIDTH * VC4_COB_NUM_LINES;
+ reg |= (top - 1) << 16;
+ HVS_WRITE(SCALER_DISPBASE2, reg);
+ reg = top;
+ top += VC4_COB_LINE_WIDTH * VC4_COB_NUM_LINES;
+ reg |= (top - 1) << 16;
+ HVS_WRITE(SCALER_DISPBASE1, reg);
+ reg = top;
+ top = VC4_COB_SIZE;
+ reg |= (top - 1) << 16;
+ HVS_WRITE(SCALER_DISPBASE0, reg);
+ } else {
+ /* The COB is 44416 pixels, or 10.8 lines at 4096 wide.
+ * The bottom 4096 pixels are full RGBA (intended for the TXP
+ * composing RGBA to memory), whilst the remainder are only
+ * RGB. Addressing is always pixel wide.
+ *
+ * Assign 3 lines of 4096 to channels 1 & 2, and just over 4
+ * lines. to channel 0.
+ */
+ #define VC5_COB_SIZE 44416
+ #define VC5_COB_LINE_WIDTH 4096
+ #define VC5_COB_NUM_LINES 3
+ reg = 0;
+ top = VC5_COB_LINE_WIDTH * VC5_COB_NUM_LINES;
+ reg |= top << 16;
+ HVS_WRITE(SCALER_DISPBASE2, reg);
+ top += 16;
+ reg = top;
+ top += VC5_COB_LINE_WIDTH * VC5_COB_NUM_LINES;
+ reg |= top << 16;
+ HVS_WRITE(SCALER_DISPBASE1, reg);
+ top += 16;
+ reg = top;
+ top = VC5_COB_SIZE;
+ reg |= top << 16;
+ HVS_WRITE(SCALER_DISPBASE0, reg);
+ }
+
+ ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
+ vc4_hvs_irq_handler, 0, "vc4 hvs", drm);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static void vc4_hvs_unbind(struct device *dev, struct device *master,
+ void *data)
+{
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+ struct vc4_hvs *hvs = vc4->hvs;
+ struct drm_mm_node *node, *next;
+
+ if (drm_mm_node_allocated(&vc4->hvs->mitchell_netravali_filter))
+ drm_mm_remove_node(&vc4->hvs->mitchell_netravali_filter);
+
+ drm_mm_for_each_node_safe(node, next, &vc4->hvs->dlist_mm)
+ drm_mm_remove_node(node);
+
+ drm_mm_takedown(&vc4->hvs->dlist_mm);
+
+ drm_mm_for_each_node_safe(node, next, &vc4->hvs->lbm_mm)
+ drm_mm_remove_node(node);
+ drm_mm_takedown(&vc4->hvs->lbm_mm);
+
+ clk_disable_unprepare(hvs->core_clk);
+
+ vc4->hvs = NULL;
+}
+
+static const struct component_ops vc4_hvs_ops = {
+ .bind = vc4_hvs_bind,
+ .unbind = vc4_hvs_unbind,
+};
+
+static int vc4_hvs_dev_probe(struct platform_device *pdev)
+{
+ return component_add(&pdev->dev, &vc4_hvs_ops);
+}
+
+static void vc4_hvs_dev_remove(struct platform_device *pdev)
+{
+ component_del(&pdev->dev, &vc4_hvs_ops);
+}
+
+static const struct of_device_id vc4_hvs_dt_match[] = {
+ { .compatible = "brcm,bcm2711-hvs" },
+ { .compatible = "brcm,bcm2835-hvs" },
+ {}
+};
+
+struct platform_driver vc4_hvs_driver = {
+ .probe = vc4_hvs_dev_probe,
+ .remove_new = vc4_hvs_dev_remove,
+ .driver = {
+ .name = "vc4_hvs",
+ .of_match_table = vc4_hvs_dt_match,
+ },
+};