diff options
author | Thomas Zimmermann <tzimmermann@suse.de> | 2024-10-25 15:43:37 +0200 |
---|---|---|
committer | Thomas Zimmermann <tzimmermann@suse.de> | 2024-10-25 15:43:37 +0200 |
commit | 12837c251571546470056761ee0c7b944c89edc3 (patch) | |
tree | 1bc9f3933ad713077636bae8522f41f43078c861 | |
parent | 8ccf06f174811c962af31157c247cffbd9317f0a (diff) |
2024y-10m-25d-13h-42m-32s UTC: drm-tip rerere cache update
git version 2.46.0
5 files changed, 1089 insertions, 7132 deletions
diff --git a/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/postimage.2 b/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/postimage.2 deleted file mode 100644 index aec7db39c061..000000000000 --- a/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/postimage.2 +++ /dev/null @@ -1,1780 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2021 Intel Corporation - */ - -#include "xe_lrc.h" - -#include <generated/xe_wa_oob.h> - -#include <linux/ascii85.h> - -#include "instructions/xe_mi_commands.h" -#include "instructions/xe_gfxpipe_commands.h" -#include "instructions/xe_gfx_state_commands.h" -#include "regs/xe_engine_regs.h" -#include "regs/xe_lrc_layout.h" -#include "xe_bb.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_exec_queue_types.h" -#include "xe_gt.h" -#include "xe_gt_printk.h" -#include "xe_hw_fence.h" -#include "xe_map.h" -#include "xe_memirq.h" -#include "xe_sriov.h" -#include "xe_vm.h" -#include "xe_wa.h" - -#define LRC_VALID BIT_ULL(0) -#define LRC_PRIVILEGE BIT_ULL(8) -#define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3) -#define LRC_LEGACY_64B_CONTEXT 3 - -#define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) -#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) - -#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K - -struct xe_lrc_snapshot { - struct xe_bo *lrc_bo; - void *lrc_snapshot; - unsigned long lrc_size, lrc_offset; - - u32 context_desc; - u32 indirect_context_desc; - u32 head; - struct { - u32 internal; - u32 memory; - } tail; - u32 start_seqno; - u32 seqno; - u32 ctx_timestamp; - u32 ctx_job_timestamp; -}; - -static struct xe_device * -lrc_to_xe(struct xe_lrc *lrc) -{ - return gt_to_xe(lrc->fence_ctx.gt); -} - -size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) -{ - struct xe_device *xe = gt_to_xe(gt); - size_t size; - - switch (class) { - case XE_ENGINE_CLASS_RENDER: - if (GRAPHICS_VER(xe) >= 20) - size = 4 * SZ_4K; - else - size = 14 * SZ_4K; - break; - case XE_ENGINE_CLASS_COMPUTE: - /* 14 pages since graphics_ver == 11 */ - if (GRAPHICS_VER(xe) >= 20) - size = 3 * SZ_4K; - else - size = 14 * SZ_4K; - break; - default: - WARN(1, "Unknown engine class: %d", class); - fallthrough; - case XE_ENGINE_CLASS_COPY: - case XE_ENGINE_CLASS_VIDEO_DECODE: - case XE_ENGINE_CLASS_VIDEO_ENHANCE: - case XE_ENGINE_CLASS_OTHER: - size = 2 * SZ_4K; - } - - /* Add indirect ring state page */ - if (xe_gt_has_indirect_ring_state(gt)) - size += LRC_INDIRECT_RING_STATE_SIZE; - - return size; -} - -/* - * The per-platform tables are u8-encoded in @data. Decode @data and set the - * addresses' offset and commands in @regs. The following encoding is used - * for each byte. There are 2 steps: decoding commands and decoding addresses. - * - * Commands: - * [7]: create NOPs - number of NOPs are set in lower bits - * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set - * MI_LRI_FORCE_POSTED - * [5:0]: Number of NOPs or registers to set values to in case of - * MI_LOAD_REGISTER_IMM - * - * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" - * number of registers. They are set by using the REG/REG16 macros: the former - * is used for offsets smaller than 0x200 while the latter is for values bigger - * than that. Those macros already set all the bits documented below correctly: - * - * [7]: When a register offset needs more than 6 bits, use additional bytes, to - * follow, for the lower bits - * [6:0]: Register offset, without considering the engine base. - * - * This function only tweaks the commands and register offsets. Values are not - * filled out. - */ -static void set_offsets(u32 *regs, - const u8 *data, - const struct xe_hw_engine *hwe) -#define NOP(x) (BIT(7) | (x)) -#define LRI(count, flags) ((flags) << 6 | (count) | \ - BUILD_BUG_ON_ZERO(count >= BIT(6))) -#define POSTED BIT(0) -#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) -#define REG16(x) \ - (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ - (((x) >> 2) & 0x7f) -{ - const u32 base = hwe->mmio_base; - - while (*data) { - u8 count, flags; - - if (*data & BIT(7)) { /* skip */ - count = *data++ & ~BIT(7); - regs += count; - continue; - } - - count = *data & 0x3f; - flags = *data >> 6; - data++; - - *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); - if (flags & POSTED) - *regs |= MI_LRI_FORCE_POSTED; - *regs |= MI_LRI_LRM_CS_MMIO; - regs++; - - xe_gt_assert(hwe->gt, count); - do { - u32 offset = 0; - u8 v; - - do { - v = *data++; - offset <<= 7; - offset |= v & ~BIT(7); - } while (v & BIT(7)); - - regs[0] = base + (offset << 2); - regs += 2; - } while (--count); - } - - *regs = MI_BATCH_BUFFER_END | BIT(0); -} - -static const u8 gen12_xcs_offsets[] = { - NOP(1), - LRI(13, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - - NOP(5), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - 0 -}; - -static const u8 dg2_xcs_offsets[] = { - NOP(1), - LRI(15, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - REG(0x120), - REG(0x124), - - NOP(1), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - 0 -}; - -static const u8 gen12_rcs_offsets[] = { - NOP(1), - LRI(13, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - - NOP(5), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - LRI(3, POSTED), - REG(0x1b0), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - NOP(3 + 9 + 1), - - LRI(51, POSTED), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG(0x028), - REG(0x09c), - REG(0x0c0), - REG(0x178), - REG(0x17c), - REG16(0x358), - REG(0x170), - REG(0x150), - REG(0x154), - REG(0x158), - REG16(0x41c), - REG16(0x600), - REG16(0x604), - REG16(0x608), - REG16(0x60c), - REG16(0x610), - REG16(0x614), - REG16(0x618), - REG16(0x61c), - REG16(0x620), - REG16(0x624), - REG16(0x628), - REG16(0x62c), - REG16(0x630), - REG16(0x634), - REG16(0x638), - REG16(0x63c), - REG16(0x640), - REG16(0x644), - REG16(0x648), - REG16(0x64c), - REG16(0x650), - REG16(0x654), - REG16(0x658), - REG16(0x65c), - REG16(0x660), - REG16(0x664), - REG16(0x668), - REG16(0x66c), - REG16(0x670), - REG16(0x674), - REG16(0x678), - REG16(0x67c), - REG(0x068), - REG(0x084), - NOP(1), - - 0 -}; - -static const u8 xehp_rcs_offsets[] = { - NOP(1), - LRI(13, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - - NOP(5), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - LRI(3, POSTED), - REG(0x1b0), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - - 0 -}; - -static const u8 dg2_rcs_offsets[] = { - NOP(1), - LRI(15, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - REG(0x120), - REG(0x124), - - NOP(1), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - LRI(3, POSTED), - REG(0x1b0), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - - 0 -}; - -static const u8 mtl_rcs_offsets[] = { - NOP(1), - LRI(15, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - REG(0x120), - REG(0x124), - - NOP(1), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - NOP(2), - LRI(2, POSTED), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - - 0 -}; - -#define XE2_CTX_COMMON \ - NOP(1), /* [0x00] */ \ - LRI(15, POSTED), /* [0x01] */ \ - REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ - REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ - REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ - REG(0x038), /* [0x08] RING_BUFFER_START */ \ - REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ - REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ - REG(0x140), /* [0x0e] BB_ADDR */ \ - REG(0x110), /* [0x10] BB_STATE */ \ - REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ - REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ - REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ - REG(0x180), /* [0x18] CCID */ \ - REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ - REG(0x120), /* [0x1c] PRT_BB_STATE */ \ - REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ - \ - NOP(1), /* [0x20] */ \ - LRI(9, POSTED), /* [0x21] */ \ - REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ - REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ - REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ - REG16(0x284), /* [0x28] dummy reg */ \ - REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ - REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ - REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ - REG16(0x274), /* [0x30] PTBP_UDW */ \ - REG16(0x270) /* [0x32] PTBP_LDW */ - -static const u8 xe2_rcs_offsets[] = { - XE2_CTX_COMMON, - - NOP(2), /* [0x34] */ - LRI(2, POSTED), /* [0x36] */ - REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ - REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ - - NOP(6), /* [0x41] */ - LRI(1, 0), /* [0x47] */ - REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ - - 0 -}; - -static const u8 xe2_bcs_offsets[] = { - XE2_CTX_COMMON, - - NOP(4 + 8 + 1), /* [0x34] */ - LRI(2, POSTED), /* [0x41] */ - REG16(0x200), /* [0x42] BCS_SWCTRL */ - REG16(0x204), /* [0x44] BLIT_CCTL */ - - 0 -}; - -static const u8 xe2_xcs_offsets[] = { - XE2_CTX_COMMON, - - 0 -}; - -static const u8 xe2_indirect_ring_state_offsets[] = { - NOP(1), /* [0x00] */ - LRI(5, POSTED), /* [0x01] */ - REG(0x034), /* [0x02] RING_BUFFER_HEAD */ - REG(0x030), /* [0x04] RING_BUFFER_TAIL */ - REG(0x038), /* [0x06] RING_BUFFER_START */ - REG(0x048), /* [0x08] RING_BUFFER_START_UDW */ - REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ - - NOP(5), /* [0x0c] */ - LRI(9, POSTED), /* [0x11] */ - REG(0x168), /* [0x12] BB_ADDR_UDW */ - REG(0x140), /* [0x14] BB_ADDR */ - REG(0x110), /* [0x16] BB_STATE */ - REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */ - - NOP(12), /* [0x00] */ - - 0 -}; - -#undef REG16 -#undef REG -#undef LRI -#undef NOP - -static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) -{ - if (class == XE_ENGINE_CLASS_RENDER) { - if (GRAPHICS_VER(xe) >= 20) - return xe2_rcs_offsets; - else if (GRAPHICS_VERx100(xe) >= 1270) - return mtl_rcs_offsets; - else if (GRAPHICS_VERx100(xe) >= 1255) - return dg2_rcs_offsets; - else if (GRAPHICS_VERx100(xe) >= 1250) - return xehp_rcs_offsets; - else - return gen12_rcs_offsets; - } else if (class == XE_ENGINE_CLASS_COPY) { - if (GRAPHICS_VER(xe) >= 20) - return xe2_bcs_offsets; - else - return gen12_xcs_offsets; - } else { - if (GRAPHICS_VER(xe) >= 20) - return xe2_xcs_offsets; - else if (GRAPHICS_VERx100(xe) >= 1255) - return dg2_xcs_offsets; - else - return gen12_xcs_offsets; - } -} - -static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) -{ - regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); - - if (xe_gt_has_indirect_ring_state(hwe->gt)) - regs[CTX_CONTEXT_CONTROL] |= - _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); - - /* TODO: Timestamp */ -} - -static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) -{ - struct xe_memirq *memirq = >_to_tile(hwe->gt)->sriov.vf.memirq; - struct xe_device *xe = gt_to_xe(hwe->gt); - - if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe)) - return; - - regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM | - MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT; - regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; - regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); - - regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | - MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; - regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; - regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq); - regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; - regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq); -} - -static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) -{ - struct xe_device *xe = gt_to_xe(hwe->gt); - - if (GRAPHICS_VERx100(xe) >= 1250) - return 0x70; - else - return 0x60; -} - -static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) -{ - int x; - - x = lrc_ring_mi_mode(hwe); - regs[x + 1] &= ~STOP_RING; - regs[x + 1] |= STOP_RING << 16; -} - -static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc) -{ - return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE; -} - -static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) -{ - return 0; -} - -u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) -{ - return lrc->ring.size; -} - -/* Make the magic macros work */ -#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset -#define __xe_lrc_regs_offset xe_lrc_regs_offset - -#define LRC_SEQNO_PPHWSP_OFFSET 512 -#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) -#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) -#define LRC_PARALLEL_PPHWSP_OFFSET 2048 -#define LRC_PPHWSP_SIZE SZ_4K - -u32 xe_lrc_regs_offset(struct xe_lrc *lrc) -{ - return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; -} - -static size_t lrc_reg_size(struct xe_device *xe) -{ - if (GRAPHICS_VERx100(xe) >= 1250) - return 96 * sizeof(u32); - else - return 80 * sizeof(u32); -} - -size_t xe_lrc_skip_size(struct xe_device *xe) -{ - return LRC_PPHWSP_SIZE + lrc_reg_size(xe); -} - -static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) -{ - /* The seqno is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; -} - -static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) -{ - /* The start seqno is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; -} - -static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) -{ - /* The start seqno is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; -} - -static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) -{ - /* The parallel is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; -} - -static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) -{ - return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); -} - -static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) -{ - /* Indirect ring state page is at the very end of LRC */ - return lrc->size - LRC_INDIRECT_RING_STATE_SIZE; -} - -#define DECL_MAP_ADDR_HELPERS(elem) \ -static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ -{ \ - struct iosys_map map = lrc->bo->vmap; \ -\ - xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ - iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ - return map; \ -} \ -static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ -{ \ - return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ -} \ - -DECL_MAP_ADDR_HELPERS(ring) -DECL_MAP_ADDR_HELPERS(pphwsp) -DECL_MAP_ADDR_HELPERS(seqno) -DECL_MAP_ADDR_HELPERS(regs) -DECL_MAP_ADDR_HELPERS(start_seqno) -DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) -DECL_MAP_ADDR_HELPERS(ctx_timestamp) -DECL_MAP_ADDR_HELPERS(parallel) -DECL_MAP_ADDR_HELPERS(indirect_ring) - -#undef DECL_MAP_ADDR_HELPERS - -/** - * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address - * @lrc: Pointer to the lrc. - * - * Returns: ctx timestamp GGTT address - */ -u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_ctx_timestamp_ggtt_addr(lrc); -} - -/** - * xe_lrc_ctx_timestamp() - Read ctx timestamp value - * @lrc: Pointer to the lrc. - * - * Returns: ctx timestamp value - */ -u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_ctx_timestamp_map(lrc); - return xe_map_read32(xe, &map); -} - -/** - * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address - * @lrc: Pointer to the lrc. - * - * Returns: ctx timestamp job GGTT address - */ -u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); -} - -/** - * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value - * @lrc: Pointer to the lrc. - * - * Returns: ctx timestamp job value - */ -u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_ctx_job_timestamp_map(lrc); - return xe_map_read32(xe, &map); -} - -u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_pphwsp_ggtt_addr(lrc); -} - -u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc) -{ - if (!xe_lrc_has_indirect_ring_state(lrc)) - return 0; - - return __xe_lrc_indirect_ring_ggtt_addr(lrc); -} - -static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_indirect_ring_map(lrc); - iosys_map_incr(&map, reg_nr * sizeof(u32)); - return xe_map_read32(xe, &map); -} - -static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc, - int reg_nr, u32 val) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_indirect_ring_map(lrc); - iosys_map_incr(&map, reg_nr * sizeof(u32)); - xe_map_write32(xe, &map, val); -} - -u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_regs_map(lrc); - iosys_map_incr(&map, reg_nr * sizeof(u32)); - return xe_map_read32(xe, &map); -} - -void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_regs_map(lrc); - iosys_map_incr(&map, reg_nr * sizeof(u32)); - xe_map_write32(xe, &map, val); -} - -static void *empty_lrc_data(struct xe_hw_engine *hwe) -{ - struct xe_gt *gt = hwe->gt; - void *data; - u32 *regs; - - data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL); - if (!data) - return NULL; - - /* 1st page: Per-Process of HW status Page */ - regs = data + LRC_PPHWSP_SIZE; - set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe); - set_context_control(regs, hwe); - set_memory_based_intr(regs, hwe); - reset_stop_ring(regs, hwe); - if (xe_gt_has_indirect_ring_state(gt)) { - regs = data + xe_gt_lrc_size(gt, hwe->class) - - LRC_INDIRECT_RING_STATE_SIZE; - set_offsets(regs, xe2_indirect_ring_state_offsets, hwe); - } - - return data; -} - -static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) -{ - u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); - - xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); - xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); -} - -static void xe_lrc_finish(struct xe_lrc *lrc) -{ - xe_hw_fence_ctx_finish(&lrc->fence_ctx); - xe_bo_lock(lrc->bo, false); - xe_bo_unpin(lrc->bo); - xe_bo_unlock(lrc->bo); - xe_bo_put(lrc->bo); -} - -#define PVC_CTX_ASID (0x2e + 1) -#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) - -static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, - struct xe_vm *vm, u32 ring_size) -{ - struct xe_gt *gt = hwe->gt; - struct xe_tile *tile = gt_to_tile(gt); - struct xe_device *xe = gt_to_xe(gt); - struct iosys_map map; - void *init_data = NULL; - u32 arb_enable; - u32 lrc_size; - int err; - - kref_init(&lrc->refcount); - lrc->flags = 0; - lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); - if (xe_gt_has_indirect_ring_state(gt)) - lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; - - /* - * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address - * via VM bind calls. - */ - lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); - if (IS_ERR(lrc->bo)) - return PTR_ERR(lrc->bo); - - lrc->size = lrc_size; - lrc->tile = gt_to_tile(hwe->gt); - lrc->ring.size = ring_size; - lrc->ring.tail = 0; - lrc->ctx_timestamp = 0; - - xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, - hwe->fence_irq, hwe->name); - - if (!gt->default_lrc[hwe->class]) { - init_data = empty_lrc_data(hwe); - if (!init_data) { - err = -ENOMEM; - goto err_lrc_finish; - } - } - - /* - * Init Per-Process of HW status Page, LRC / context state to known - * values - */ - map = __xe_lrc_pphwsp_map(lrc); - if (!init_data) { - xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ - xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, - gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, - xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE); - } else { - xe_map_memcpy_to(xe, &map, 0, init_data, - xe_gt_lrc_size(gt, hwe->class)); - kfree(init_data); - } - - if (vm) { - xe_lrc_set_ppgtt(lrc, vm); - - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, lrc->bo); - } - - if (xe_gt_has_indirect_ring_state(gt)) { - xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, - __xe_lrc_indirect_ring_ggtt_addr(lrc)); - - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, - __xe_lrc_ring_ggtt_addr(lrc)); - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0); - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0); - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail); - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL, - RING_CTL_SIZE(lrc->ring.size) | RING_VALID); - } else { - xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); - xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); - xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); - xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, - RING_CTL_SIZE(lrc->ring.size) | RING_VALID); - } - - xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0); - - if (xe->info.has_asid && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); - - lrc->desc = LRC_VALID; - lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); - /* TODO: Priority */ - - /* While this appears to have something about privileged batches or - * some such, it really just means PPGTT mode. - */ - if (vm) - lrc->desc |= LRC_PRIVILEGE; - - if (GRAPHICS_VERx100(xe) < 1250) { - lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance); - lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class); - } - - arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; - xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); - - map = __xe_lrc_seqno_map(lrc); - xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); - - map = __xe_lrc_start_seqno_map(lrc); - xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); - - return 0; - -err_lrc_finish: - xe_lrc_finish(lrc); - return err; -} - -/** - * xe_lrc_create - Create a LRC - * @hwe: Hardware Engine - * @vm: The VM (address space) - * @ring_size: LRC ring size - * - * Allocate and initialize the Logical Ring Context (LRC). - * - * Return pointer to created LRC upon success and an error pointer - * upon failure. - */ -struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, - u32 ring_size) -{ - struct xe_lrc *lrc; - int err; - - lrc = kzalloc(sizeof(*lrc), GFP_KERNEL); - if (!lrc) - return ERR_PTR(-ENOMEM); - - err = xe_lrc_init(lrc, hwe, vm, ring_size); - if (err) { - kfree(lrc); - return ERR_PTR(err); - } - - return lrc; -} - -/** - * xe_lrc_destroy - Destroy the LRC - * @ref: reference to LRC - * - * Called when ref == 0, release resources held by the Logical Ring Context - * (LRC) and free the LRC memory. - */ -void xe_lrc_destroy(struct kref *ref) -{ - struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount); - - xe_lrc_finish(lrc); - kfree(lrc); -} - -void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail) -{ - if (xe_lrc_has_indirect_ring_state(lrc)) - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail); - else - xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail); -} - -u32 xe_lrc_ring_tail(struct xe_lrc *lrc) -{ - if (xe_lrc_has_indirect_ring_state(lrc)) - return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR; - else - return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR; -} - -void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) -{ - if (xe_lrc_has_indirect_ring_state(lrc)) - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head); - else - xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); -} - -u32 xe_lrc_ring_head(struct xe_lrc *lrc) -{ - if (xe_lrc_has_indirect_ring_state(lrc)) - return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR; - else - return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; -} - -u32 xe_lrc_ring_space(struct xe_lrc *lrc) -{ - const u32 head = xe_lrc_ring_head(lrc); - const u32 tail = lrc->ring.tail; - const u32 size = lrc->ring.size; - - return ((head - tail - 1) & (size - 1)) + 1; -} - -static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, - const void *data, size_t size) -{ - struct xe_device *xe = lrc_to_xe(lrc); - - iosys_map_incr(&ring, lrc->ring.tail); - xe_map_memcpy_to(xe, &ring, 0, data, size); - lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); -} - -void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map ring; - u32 rhs; - size_t aligned_size; - - xe_assert(xe, IS_ALIGNED(size, 4)); - aligned_size = ALIGN(size, 8); - - ring = __xe_lrc_ring_map(lrc); - - xe_assert(xe, lrc->ring.tail < lrc->ring.size); - rhs = lrc->ring.size - lrc->ring.tail; - if (size > rhs) { - __xe_lrc_write_ring(lrc, ring, data, rhs); - __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); - } else { - __xe_lrc_write_ring(lrc, ring, data, size); - } - - if (aligned_size > size) { - u32 noop = MI_NOOP; - - __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); - } -} - -u64 xe_lrc_descriptor(struct xe_lrc *lrc) -{ - return lrc->desc | xe_lrc_ggtt_addr(lrc); -} - -u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_seqno_ggtt_addr(lrc); -} - -/** - * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence. - * - * Allocate but don't initialize an lrc seqno fence. - * - * Return: Pointer to the allocated fence or - * negative error pointer on error. - */ -struct dma_fence *xe_lrc_alloc_seqno_fence(void) -{ - return xe_hw_fence_alloc(); -} - -/** - * xe_lrc_free_seqno_fence() - Free an lrc seqno fence. - * @fence: Pointer to the fence to free. - * - * Frees an lrc seqno fence that hasn't yet been - * initialized. - */ -void xe_lrc_free_seqno_fence(struct dma_fence *fence) -{ - xe_hw_fence_free(fence); -} - -/** - * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence. - * @lrc: Pointer to the lrc. - * @fence: Pointer to the fence to initialize. - * - * Initializes a pre-allocated lrc seqno fence. - * After initialization, the fence is subject to normal - * dma-fence refcounting. - */ -void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence) -{ - xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc)); -} - -s32 xe_lrc_seqno(struct xe_lrc *lrc) -{ - struct iosys_map map = __xe_lrc_seqno_map(lrc); - - return xe_map_read32(lrc_to_xe(lrc), &map); -} - -s32 xe_lrc_start_seqno(struct xe_lrc *lrc) -{ - struct iosys_map map = __xe_lrc_start_seqno_map(lrc); - - return xe_map_read32(lrc_to_xe(lrc), &map); -} - -u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_start_seqno_ggtt_addr(lrc); -} - -u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_parallel_ggtt_addr(lrc); -} - -struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) -{ - return __xe_lrc_parallel_map(lrc); -} - -static int instr_dw(u32 cmd_header) -{ - /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ - if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == - GFXPIPE_SINGLE_DW_CMD(0, 0)) - return 1; - - /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ - if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) - return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; - - /* Most instructions have the # of dwords (minus 2) in 7:0 */ - return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; -} - -static int dump_mi_command(struct drm_printer *p, - struct xe_gt *gt, - u32 *dw, - int remaining_dw) -{ - u32 inst_header = *dw; - u32 numdw = instr_dw(inst_header); - u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); - int num_noop; - - /* First check for commands that don't have/use a '# DW' field */ - switch (inst_header & MI_OPCODE) { - case MI_NOOP: - num_noop = 1; - while (num_noop < remaining_dw && - (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) - num_noop++; - drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); - return num_noop; - - case MI_TOPOLOGY_FILTER: - drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); - return 1; - - case MI_BATCH_BUFFER_END: - drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); - /* Return 'remaining_dw' to consume the rest of the LRC */ - return remaining_dw; - } - - /* - * Any remaining commands include a # of dwords. We should make sure - * it doesn't exceed the remaining size of the LRC. - */ - if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) - numdw = remaining_dw; - - switch (inst_header & MI_OPCODE) { - case MI_LOAD_REGISTER_IMM: - drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", - inst_header, (numdw - 1) / 2); - for (int i = 1; i < numdw; i += 2) - drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); - return numdw; - - case MI_LOAD_REGISTER_MEM & MI_OPCODE: - drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", - inst_header, - dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", - dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); - if (numdw == 4) - drm_printf(p, " - %#6x = %#010llx\n", - dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); - else - drm_printf(p, " - %*ph (%s)\n", - (int)sizeof(u32) * (numdw - 1), dw + 1, - numdw < 4 ? "truncated" : "malformed"); - return numdw; - - case MI_FORCE_WAKEUP: - drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); - return numdw; - - default: - drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", - inst_header, opcode, numdw); - return numdw; - } -} - -static int dump_gfxpipe_command(struct drm_printer *p, - struct xe_gt *gt, - u32 *dw, - int remaining_dw) -{ - u32 numdw = instr_dw(*dw); - u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); - u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); - u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); - - /* - * Make sure we haven't mis-parsed a number of dwords that exceeds the - * remaining size of the LRC. - */ - if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) - numdw = remaining_dw; - - switch (*dw & GFXPIPE_MATCH_MASK) { -#define MATCH(cmd) \ - case cmd: \ - drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ - return numdw -#define MATCH3D(cmd) \ - case CMD_##cmd: \ - drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ - return numdw - - MATCH(STATE_BASE_ADDRESS); - MATCH(STATE_SIP); - MATCH(GPGPU_CSR_BASE_ADDRESS); - MATCH(STATE_COMPUTE_MODE); - MATCH3D(3DSTATE_BTD); - MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS); - MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS); - - MATCH3D(3DSTATE_VF_STATISTICS); - - MATCH(PIPELINE_SELECT); - - MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); - MATCH3D(3DSTATE_CLEAR_PARAMS); - MATCH3D(3DSTATE_DEPTH_BUFFER); - MATCH3D(3DSTATE_STENCIL_BUFFER); - MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); - MATCH3D(3DSTATE_VERTEX_BUFFERS); - MATCH3D(3DSTATE_VERTEX_ELEMENTS); - MATCH3D(3DSTATE_INDEX_BUFFER); - MATCH3D(3DSTATE_VF); - MATCH3D(3DSTATE_MULTISAMPLE); - MATCH3D(3DSTATE_CC_STATE_POINTERS); - MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); - MATCH3D(3DSTATE_VS); - MATCH3D(3DSTATE_GS); - MATCH3D(3DSTATE_CLIP); - MATCH3D(3DSTATE_SF); - MATCH3D(3DSTATE_WM); - MATCH3D(3DSTATE_CONSTANT_VS); - MATCH3D(3DSTATE_CONSTANT_GS); - MATCH3D(3DSTATE_CONSTANT_PS); - MATCH3D(3DSTATE_SAMPLE_MASK); - MATCH3D(3DSTATE_CONSTANT_HS); - MATCH3D(3DSTATE_CONSTANT_DS); - MATCH3D(3DSTATE_HS); - MATCH3D(3DSTATE_TE); - MATCH3D(3DSTATE_DS); - MATCH3D(3DSTATE_STREAMOUT); - MATCH3D(3DSTATE_SBE); - MATCH3D(3DSTATE_PS); - MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); - MATCH3D(3DSTATE_CPS_POINTERS); - MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); - MATCH3D(3DSTATE_BLEND_STATE_POINTERS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); - MATCH3D(3DSTATE_VF_INSTANCING); - MATCH3D(3DSTATE_VF_SGVS); - MATCH3D(3DSTATE_VF_TOPOLOGY); - MATCH3D(3DSTATE_WM_CHROMAKEY); - MATCH3D(3DSTATE_PS_BLEND); - MATCH3D(3DSTATE_WM_DEPTH_STENCIL); - MATCH3D(3DSTATE_PS_EXTRA); - MATCH3D(3DSTATE_RASTER); - MATCH3D(3DSTATE_SBE_SWIZ); - MATCH3D(3DSTATE_WM_HZ_OP); - MATCH3D(3DSTATE_VF_COMPONENT_PACKING); - MATCH3D(3DSTATE_VF_SGVS_2); - MATCH3D(3DSTATE_VFG); - MATCH3D(3DSTATE_URB_ALLOC_VS); - MATCH3D(3DSTATE_URB_ALLOC_HS); - MATCH3D(3DSTATE_URB_ALLOC_DS); - MATCH3D(3DSTATE_URB_ALLOC_GS); - MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); - MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); - MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); - MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); - MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); - MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); - MATCH3D(3DSTATE_AMFS); - MATCH3D(3DSTATE_DEPTH_BOUNDS); - MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); - MATCH3D(3DSTATE_CONSTANT_TS_POINTER); - MATCH3D(3DSTATE_MESH_CONTROL); - MATCH3D(3DSTATE_MESH_DISTRIB); - MATCH3D(3DSTATE_TASK_REDISTRIB); - MATCH3D(3DSTATE_MESH_SHADER); - MATCH3D(3DSTATE_MESH_SHADER_DATA); - MATCH3D(3DSTATE_TASK_CONTROL); - MATCH3D(3DSTATE_TASK_SHADER); - MATCH3D(3DSTATE_TASK_SHADER_DATA); - MATCH3D(3DSTATE_URB_ALLOC_MESH); - MATCH3D(3DSTATE_URB_ALLOC_TASK); - MATCH3D(3DSTATE_CLIP_MESH); - MATCH3D(3DSTATE_SBE_MESH); - MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); - - MATCH3D(3DSTATE_DRAWING_RECTANGLE); - MATCH3D(3DSTATE_CHROMA_KEY); - MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); - MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); - MATCH3D(3DSTATE_LINE_STIPPLE); - MATCH3D(3DSTATE_AA_LINE_PARAMETERS); - MATCH3D(3DSTATE_MONOFILTER_SIZE); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); - MATCH3D(3DSTATE_SO_DECL_LIST); - MATCH3D(3DSTATE_SO_BUFFER); - MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); - MATCH3D(3DSTATE_SAMPLE_PATTERN); - MATCH3D(3DSTATE_3D_MODE); - MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); - MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); - MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); - - default: - drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", - *dw, pipeline, opcode, subopcode, numdw); - return numdw; - } -} - -static int dump_gfx_state_command(struct drm_printer *p, - struct xe_gt *gt, - u32 *dw, - int remaining_dw) -{ - u32 numdw = instr_dw(*dw); - u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw); - - /* - * Make sure we haven't mis-parsed a number of dwords that exceeds the - * remaining size of the LRC. - */ - if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) - numdw = remaining_dw; - - switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) { - MATCH(STATE_WRITE_INLINE); - - default: - drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n", - *dw, opcode, numdw); - return numdw; - } -} - -void xe_lrc_dump_default(struct drm_printer *p, - struct xe_gt *gt, - enum xe_engine_class hwe_class) -{ - u32 *dw; - int remaining_dw, num_dw; - - if (!gt->default_lrc[hwe_class]) { - drm_printf(p, "No default LRC for class %d\n", hwe_class); - return; - } - - /* - * Skip the beginning of the LRC since it contains the per-process - * hardware status page. - */ - dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; - remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4; - - while (remaining_dw > 0) { - if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { - num_dw = dump_mi_command(p, gt, dw, remaining_dw); - } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { - num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); - } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) { - num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw); - } else { - num_dw = min(instr_dw(*dw), remaining_dw); - drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", - *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), - num_dw); - } - - dw += num_dw; - remaining_dw -= num_dw; - } -} - -struct instr_state { - u32 instr; - u16 num_dw; -}; - -static const struct instr_state xe_hpg_svg_state[] = { - { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, - { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, - { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, - { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, - { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, - { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, - { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, - { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, - { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, - { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, - { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, - { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, - { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, - { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, - { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, - { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, - { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, - { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, - { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, - { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, - { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, - { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, - { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, - { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, - { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, - { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, - { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, - { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, - { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, - { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, - { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, - { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, - { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, - { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, -}; - -void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) -{ - struct xe_gt *gt = q->hwe->gt; - struct xe_device *xe = gt_to_xe(gt); - const struct instr_state *state_table = NULL; - int state_table_size = 0; - - /* - * Wa_14019789679 - * - * If the driver doesn't explicitly emit the SVG instructions while - * setting up the default LRC, the context switch will write 0's - * (noops) into the LRC memory rather than the expected instruction - * headers. Application contexts start out as a copy of the default - * LRC, and if they also do not emit specific settings for some SVG - * state, then on context restore they'll unintentionally inherit - * whatever state setting the previous context had programmed into the - * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will - * prevent the hardware from resetting that state back to any specific - * value). - * - * The official workaround only requires emitting 3DSTATE_MESH_CONTROL - * since that's a specific state setting that can easily cause GPU - * hangs if unintentionally inherited. However to be safe we'll - * continue to emit all of the SVG state since it's best not to leak - * any of the state between contexts, even if that leakage is harmless. - */ - if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { - state_table = xe_hpg_svg_state; - state_table_size = ARRAY_SIZE(xe_hpg_svg_state); - } - - if (!state_table) { - xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", - GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); - return; - } - - for (int i = 0; i < state_table_size; i++) { - u32 instr = state_table[i].instr; - u16 num_dw = state_table[i].num_dw; - bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); - - xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); - xe_gt_assert(gt, num_dw != 0); - xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); - - /* - * Xe2's SVG context is the same as the one on DG2 / MTL - * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has - * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). - * Just make the replacement here rather than defining a - * whole separate table for the single trivial change. - */ - if (GRAPHICS_VER(xe) >= 20 && - instr == CMD_3DSTATE_DRAWING_RECTANGLE) - instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; - - bb->cs[bb->len] = instr; - if (!is_single_dw) - bb->cs[bb->len] |= (num_dw - 2); - - bb->len += num_dw; - } -} - -struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) -{ - struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT); - - if (!snapshot) - return NULL; - - if (lrc->bo->vm) - xe_vm_get(lrc->bo->vm); - - snapshot->context_desc = xe_lrc_ggtt_addr(lrc); - snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); - snapshot->head = xe_lrc_ring_head(lrc); - snapshot->tail.internal = lrc->ring.tail; - snapshot->tail.memory = xe_lrc_ring_tail(lrc); - snapshot->start_seqno = xe_lrc_start_seqno(lrc); - snapshot->seqno = xe_lrc_seqno(lrc); - snapshot->lrc_bo = xe_bo_get(lrc->bo); - snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); - snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; - snapshot->lrc_snapshot = NULL; - snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); - snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); - return snapshot; -} - -void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) -{ - struct xe_bo *bo; - struct xe_vm *vm; - struct iosys_map src; - - if (!snapshot) - return; - - bo = snapshot->lrc_bo; - vm = bo->vm; - snapshot->lrc_bo = NULL; - - snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); - if (!snapshot->lrc_snapshot) - goto put_bo; - - xe_bo_lock(bo, false); - if (!ttm_bo_vmap(&bo->ttm, &src)) { - xe_map_memcpy_from(xe_bo_device(bo), - snapshot->lrc_snapshot, &src, snapshot->lrc_offset, - snapshot->lrc_size); - ttm_bo_vunmap(&bo->ttm, &src); - } else { - kvfree(snapshot->lrc_snapshot); - snapshot->lrc_snapshot = NULL; - } - xe_bo_unlock(bo); -put_bo: - xe_bo_put(bo); - if (vm) - xe_vm_put(vm); -} - -void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) -{ - unsigned long i; - - if (!snapshot) - return; - - drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); - drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n", - snapshot->indirect_context_desc); - drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); - drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", - snapshot->tail.internal, snapshot->tail.memory); - drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); - drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); - drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp); - drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp); - - if (!snapshot->lrc_snapshot) - return; - - drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE); - drm_puts(p, "\t[HWSP].data: "); - for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) { - u32 *val = snapshot->lrc_snapshot + i; - char dumped[ASCII85_BUFSZ]; - - drm_puts(p, ascii85_encode(*val, dumped)); - } - - drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE); - drm_puts(p, "\t[HWCTX].data: "); - for (; i < snapshot->lrc_size; i += sizeof(u32)) { - u32 *val = snapshot->lrc_snapshot + i; - char dumped[ASCII85_BUFSZ]; - - drm_puts(p, ascii85_encode(*val, dumped)); - } - drm_puts(p, "\n"); -} - -void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) -{ - if (!snapshot) - return; - - kvfree(snapshot->lrc_snapshot); - if (snapshot->lrc_bo) { - struct xe_vm *vm; - - vm = snapshot->lrc_bo->vm; - xe_bo_put(snapshot->lrc_bo); - if (vm) - xe_vm_put(vm); - } - kfree(snapshot); -} - -/** - * xe_lrc_update_timestamp() - Update ctx timestamp - * @lrc: Pointer to the lrc. - * @old_ts: Old timestamp value - * - * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and - * update saved value. - * - * Returns: New ctx timestamp value - */ -u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) -{ - *old_ts = lrc->ctx_timestamp; - - lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); - - return lrc->ctx_timestamp; -} diff --git a/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage b/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage deleted file mode 100644 index bee934c9371f..000000000000 --- a/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage +++ /dev/null @@ -1,1784 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2021 Intel Corporation - */ - -#include "xe_lrc.h" - -#include <generated/xe_wa_oob.h> - -#include <linux/ascii85.h> - -#include "instructions/xe_mi_commands.h" -#include "instructions/xe_gfxpipe_commands.h" -#include "instructions/xe_gfx_state_commands.h" -#include "regs/xe_engine_regs.h" -#include "regs/xe_lrc_layout.h" -#include "xe_bb.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_exec_queue_types.h" -#include "xe_gt.h" -#include "xe_gt_printk.h" -#include "xe_hw_fence.h" -#include "xe_map.h" -#include "xe_memirq.h" -#include "xe_sriov.h" -#include "xe_vm.h" -#include "xe_wa.h" - -#define LRC_VALID BIT_ULL(0) -#define LRC_PRIVILEGE BIT_ULL(8) -#define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3) -#define LRC_LEGACY_64B_CONTEXT 3 - -#define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) -#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) - -#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K - -struct xe_lrc_snapshot { - struct xe_bo *lrc_bo; - void *lrc_snapshot; - unsigned long lrc_size, lrc_offset; - - u32 context_desc; - u32 indirect_context_desc; - u32 head; - struct { - u32 internal; - u32 memory; - } tail; - u32 start_seqno; - u32 seqno; - u32 ctx_timestamp; - u32 ctx_job_timestamp; -}; - -static struct xe_device * -lrc_to_xe(struct xe_lrc *lrc) -{ - return gt_to_xe(lrc->fence_ctx.gt); -} - -size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) -{ - struct xe_device *xe = gt_to_xe(gt); - size_t size; - - switch (class) { - case XE_ENGINE_CLASS_RENDER: - if (GRAPHICS_VER(xe) >= 20) - size = 4 * SZ_4K; - else - size = 14 * SZ_4K; - break; - case XE_ENGINE_CLASS_COMPUTE: - /* 14 pages since graphics_ver == 11 */ - if (GRAPHICS_VER(xe) >= 20) - size = 3 * SZ_4K; - else - size = 14 * SZ_4K; - break; - default: - WARN(1, "Unknown engine class: %d", class); - fallthrough; - case XE_ENGINE_CLASS_COPY: - case XE_ENGINE_CLASS_VIDEO_DECODE: - case XE_ENGINE_CLASS_VIDEO_ENHANCE: - case XE_ENGINE_CLASS_OTHER: - size = 2 * SZ_4K; - } - - /* Add indirect ring state page */ - if (xe_gt_has_indirect_ring_state(gt)) - size += LRC_INDIRECT_RING_STATE_SIZE; - - return size; -} - -/* - * The per-platform tables are u8-encoded in @data. Decode @data and set the - * addresses' offset and commands in @regs. The following encoding is used - * for each byte. There are 2 steps: decoding commands and decoding addresses. - * - * Commands: - * [7]: create NOPs - number of NOPs are set in lower bits - * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set - * MI_LRI_FORCE_POSTED - * [5:0]: Number of NOPs or registers to set values to in case of - * MI_LOAD_REGISTER_IMM - * - * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" - * number of registers. They are set by using the REG/REG16 macros: the former - * is used for offsets smaller than 0x200 while the latter is for values bigger - * than that. Those macros already set all the bits documented below correctly: - * - * [7]: When a register offset needs more than 6 bits, use additional bytes, to - * follow, for the lower bits - * [6:0]: Register offset, without considering the engine base. - * - * This function only tweaks the commands and register offsets. Values are not - * filled out. - */ -static void set_offsets(u32 *regs, - const u8 *data, - const struct xe_hw_engine *hwe) -#define NOP(x) (BIT(7) | (x)) -#define LRI(count, flags) ((flags) << 6 | (count) | \ - BUILD_BUG_ON_ZERO(count >= BIT(6))) -#define POSTED BIT(0) -#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) -#define REG16(x) \ - (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ - (((x) >> 2) & 0x7f) -{ - const u32 base = hwe->mmio_base; - - while (*data) { - u8 count, flags; - - if (*data & BIT(7)) { /* skip */ - count = *data++ & ~BIT(7); - regs += count; - continue; - } - - count = *data & 0x3f; - flags = *data >> 6; - data++; - - *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); - if (flags & POSTED) - *regs |= MI_LRI_FORCE_POSTED; - *regs |= MI_LRI_LRM_CS_MMIO; - regs++; - - xe_gt_assert(hwe->gt, count); - do { - u32 offset = 0; - u8 v; - - do { - v = *data++; - offset <<= 7; - offset |= v & ~BIT(7); - } while (v & BIT(7)); - - regs[0] = base + (offset << 2); - regs += 2; - } while (--count); - } - - *regs = MI_BATCH_BUFFER_END | BIT(0); -} - -static const u8 gen12_xcs_offsets[] = { - NOP(1), - LRI(13, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - - NOP(5), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - 0 -}; - -static const u8 dg2_xcs_offsets[] = { - NOP(1), - LRI(15, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - REG(0x120), - REG(0x124), - - NOP(1), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - 0 -}; - -static const u8 gen12_rcs_offsets[] = { - NOP(1), - LRI(13, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - - NOP(5), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - LRI(3, POSTED), - REG(0x1b0), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - NOP(3 + 9 + 1), - - LRI(51, POSTED), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG(0x028), - REG(0x09c), - REG(0x0c0), - REG(0x178), - REG(0x17c), - REG16(0x358), - REG(0x170), - REG(0x150), - REG(0x154), - REG(0x158), - REG16(0x41c), - REG16(0x600), - REG16(0x604), - REG16(0x608), - REG16(0x60c), - REG16(0x610), - REG16(0x614), - REG16(0x618), - REG16(0x61c), - REG16(0x620), - REG16(0x624), - REG16(0x628), - REG16(0x62c), - REG16(0x630), - REG16(0x634), - REG16(0x638), - REG16(0x63c), - REG16(0x640), - REG16(0x644), - REG16(0x648), - REG16(0x64c), - REG16(0x650), - REG16(0x654), - REG16(0x658), - REG16(0x65c), - REG16(0x660), - REG16(0x664), - REG16(0x668), - REG16(0x66c), - REG16(0x670), - REG16(0x674), - REG16(0x678), - REG16(0x67c), - REG(0x068), - REG(0x084), - NOP(1), - - 0 -}; - -static const u8 xehp_rcs_offsets[] = { - NOP(1), - LRI(13, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - - NOP(5), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - LRI(3, POSTED), - REG(0x1b0), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - - 0 -}; - -static const u8 dg2_rcs_offsets[] = { - NOP(1), - LRI(15, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - REG(0x120), - REG(0x124), - - NOP(1), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - LRI(3, POSTED), - REG(0x1b0), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - - 0 -}; - -static const u8 mtl_rcs_offsets[] = { - NOP(1), - LRI(15, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - REG(0x120), - REG(0x124), - - NOP(1), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - NOP(2), - LRI(2, POSTED), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - - 0 -}; - -#define XE2_CTX_COMMON \ - NOP(1), /* [0x00] */ \ - LRI(15, POSTED), /* [0x01] */ \ - REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ - REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ - REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ - REG(0x038), /* [0x08] RING_BUFFER_START */ \ - REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ - REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ - REG(0x140), /* [0x0e] BB_ADDR */ \ - REG(0x110), /* [0x10] BB_STATE */ \ - REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ - REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ - REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ - REG(0x180), /* [0x18] CCID */ \ - REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ - REG(0x120), /* [0x1c] PRT_BB_STATE */ \ - REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ - \ - NOP(1), /* [0x20] */ \ - LRI(9, POSTED), /* [0x21] */ \ - REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ - REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ - REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ - REG16(0x284), /* [0x28] dummy reg */ \ - REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ - REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ - REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ - REG16(0x274), /* [0x30] PTBP_UDW */ \ - REG16(0x270) /* [0x32] PTBP_LDW */ - -static const u8 xe2_rcs_offsets[] = { - XE2_CTX_COMMON, - - NOP(2), /* [0x34] */ - LRI(2, POSTED), /* [0x36] */ - REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ - REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ - - NOP(6), /* [0x41] */ - LRI(1, 0), /* [0x47] */ - REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ - - 0 -}; - -static const u8 xe2_bcs_offsets[] = { - XE2_CTX_COMMON, - - NOP(4 + 8 + 1), /* [0x34] */ - LRI(2, POSTED), /* [0x41] */ - REG16(0x200), /* [0x42] BCS_SWCTRL */ - REG16(0x204), /* [0x44] BLIT_CCTL */ - - 0 -}; - -static const u8 xe2_xcs_offsets[] = { - XE2_CTX_COMMON, - - 0 -}; - -static const u8 xe2_indirect_ring_state_offsets[] = { - NOP(1), /* [0x00] */ - LRI(5, POSTED), /* [0x01] */ - REG(0x034), /* [0x02] RING_BUFFER_HEAD */ - REG(0x030), /* [0x04] RING_BUFFER_TAIL */ - REG(0x038), /* [0x06] RING_BUFFER_START */ - REG(0x048), /* [0x08] RING_BUFFER_START_UDW */ - REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ - - NOP(5), /* [0x0c] */ - LRI(9, POSTED), /* [0x11] */ - REG(0x168), /* [0x12] BB_ADDR_UDW */ - REG(0x140), /* [0x14] BB_ADDR */ - REG(0x110), /* [0x16] BB_STATE */ - REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */ - - NOP(12), /* [0x00] */ - - 0 -}; - -#undef REG16 -#undef REG -#undef LRI -#undef NOP - -static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) -{ - if (class == XE_ENGINE_CLASS_RENDER) { - if (GRAPHICS_VER(xe) >= 20) - return xe2_rcs_offsets; - else if (GRAPHICS_VERx100(xe) >= 1270) - return mtl_rcs_offsets; - else if (GRAPHICS_VERx100(xe) >= 1255) - return dg2_rcs_offsets; - else if (GRAPHICS_VERx100(xe) >= 1250) - return xehp_rcs_offsets; - else - return gen12_rcs_offsets; - } else if (class == XE_ENGINE_CLASS_COPY) { - if (GRAPHICS_VER(xe) >= 20) - return xe2_bcs_offsets; - else - return gen12_xcs_offsets; - } else { - if (GRAPHICS_VER(xe) >= 20) - return xe2_xcs_offsets; - else if (GRAPHICS_VERx100(xe) >= 1255) - return dg2_xcs_offsets; - else - return gen12_xcs_offsets; - } -} - -static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) -{ - regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); - - if (xe_gt_has_indirect_ring_state(hwe->gt)) - regs[CTX_CONTEXT_CONTROL] |= - _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); - - /* TODO: Timestamp */ -} - -static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) -{ - struct xe_memirq *memirq = >_to_tile(hwe->gt)->sriov.vf.memirq; - struct xe_device *xe = gt_to_xe(hwe->gt); - - if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe)) - return; - - regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM | - MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT; - regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; - regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); - - regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | - MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; - regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; - regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq); - regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; - regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq); -} - -static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) -{ - struct xe_device *xe = gt_to_xe(hwe->gt); - - if (GRAPHICS_VERx100(xe) >= 1250) - return 0x70; - else - return 0x60; -} - -static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) -{ - int x; - - x = lrc_ring_mi_mode(hwe); - regs[x + 1] &= ~STOP_RING; - regs[x + 1] |= STOP_RING << 16; -} - -static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc) -{ - return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE; -} - -static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) -{ - return 0; -} - -u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) -{ - return lrc->ring.size; -} - -/* Make the magic macros work */ -#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset -#define __xe_lrc_regs_offset xe_lrc_regs_offset - -#define LRC_SEQNO_PPHWSP_OFFSET 512 -#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) -#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) -#define LRC_PARALLEL_PPHWSP_OFFSET 2048 -#define LRC_PPHWSP_SIZE SZ_4K - -u32 xe_lrc_regs_offset(struct xe_lrc *lrc) -{ - return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; -} - -static size_t lrc_reg_size(struct xe_device *xe) -{ - if (GRAPHICS_VERx100(xe) >= 1250) - return 96 * sizeof(u32); - else - return 80 * sizeof(u32); -} - -size_t xe_lrc_skip_size(struct xe_device *xe) -{ - return LRC_PPHWSP_SIZE + lrc_reg_size(xe); -} - -static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) -{ - /* The seqno is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; -} - -static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) -{ - /* The start seqno is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; -} - -static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) -{ - /* The start seqno is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; -} - -static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) -{ - /* The parallel is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; -} - -static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) -{ - return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); -} - -static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) -{ - /* Indirect ring state page is at the very end of LRC */ - return lrc->size - LRC_INDIRECT_RING_STATE_SIZE; -} - -#define DECL_MAP_ADDR_HELPERS(elem) \ -static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ -{ \ - struct iosys_map map = lrc->bo->vmap; \ -\ - xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ - iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ - return map; \ -} \ -static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ -{ \ - return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ -} \ - -DECL_MAP_ADDR_HELPERS(ring) -DECL_MAP_ADDR_HELPERS(pphwsp) -DECL_MAP_ADDR_HELPERS(seqno) -DECL_MAP_ADDR_HELPERS(regs) -DECL_MAP_ADDR_HELPERS(start_seqno) -DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) -DECL_MAP_ADDR_HELPERS(ctx_timestamp) -DECL_MAP_ADDR_HELPERS(parallel) -DECL_MAP_ADDR_HELPERS(indirect_ring) - -#undef DECL_MAP_ADDR_HELPERS - -/** - * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address - * @lrc: Pointer to the lrc. - * - * Returns: ctx timestamp GGTT address - */ -u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_ctx_timestamp_ggtt_addr(lrc); -} - -/** - * xe_lrc_ctx_timestamp() - Read ctx timestamp value - * @lrc: Pointer to the lrc. - * - * Returns: ctx timestamp value - */ -u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_ctx_timestamp_map(lrc); - return xe_map_read32(xe, &map); -} - -/** - * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address - * @lrc: Pointer to the lrc. - * - * Returns: ctx timestamp job GGTT address - */ -u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); -} - -/** - * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value - * @lrc: Pointer to the lrc. - * - * Returns: ctx timestamp job value - */ -u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_ctx_job_timestamp_map(lrc); - return xe_map_read32(xe, &map); -} - -u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_pphwsp_ggtt_addr(lrc); -} - -u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc) -{ - if (!xe_lrc_has_indirect_ring_state(lrc)) - return 0; - - return __xe_lrc_indirect_ring_ggtt_addr(lrc); -} - -static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_indirect_ring_map(lrc); - iosys_map_incr(&map, reg_nr * sizeof(u32)); - return xe_map_read32(xe, &map); -} - -static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc, - int reg_nr, u32 val) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_indirect_ring_map(lrc); - iosys_map_incr(&map, reg_nr * sizeof(u32)); - xe_map_write32(xe, &map, val); -} - -u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_regs_map(lrc); - iosys_map_incr(&map, reg_nr * sizeof(u32)); - return xe_map_read32(xe, &map); -} - -void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_regs_map(lrc); - iosys_map_incr(&map, reg_nr * sizeof(u32)); - xe_map_write32(xe, &map, val); -} - -static void *empty_lrc_data(struct xe_hw_engine *hwe) -{ - struct xe_gt *gt = hwe->gt; - void *data; - u32 *regs; - - data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL); - if (!data) - return NULL; - - /* 1st page: Per-Process of HW status Page */ - regs = data + LRC_PPHWSP_SIZE; - set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe); - set_context_control(regs, hwe); - set_memory_based_intr(regs, hwe); - reset_stop_ring(regs, hwe); - if (xe_gt_has_indirect_ring_state(gt)) { - regs = data + xe_gt_lrc_size(gt, hwe->class) - - LRC_INDIRECT_RING_STATE_SIZE; - set_offsets(regs, xe2_indirect_ring_state_offsets, hwe); - } - - return data; -} - -static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) -{ - u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); - - xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); - xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); -} - -static void xe_lrc_finish(struct xe_lrc *lrc) -{ - xe_hw_fence_ctx_finish(&lrc->fence_ctx); - xe_bo_lock(lrc->bo, false); - xe_bo_unpin(lrc->bo); - xe_bo_unlock(lrc->bo); - xe_bo_put(lrc->bo); -} - -#define PVC_CTX_ASID (0x2e + 1) -#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) - -static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, - struct xe_vm *vm, u32 ring_size) -{ - struct xe_gt *gt = hwe->gt; - struct xe_tile *tile = gt_to_tile(gt); - struct xe_device *xe = gt_to_xe(gt); - struct iosys_map map; - void *init_data = NULL; - u32 arb_enable; - u32 lrc_size; - int err; - - kref_init(&lrc->refcount); - lrc->flags = 0; - lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); - if (xe_gt_has_indirect_ring_state(gt)) - lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; - - /* - * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address - * via VM bind calls. - */ - lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); - if (IS_ERR(lrc->bo)) - return PTR_ERR(lrc->bo); - - lrc->size = lrc_size; - lrc->tile = gt_to_tile(hwe->gt); - lrc->ring.size = ring_size; - lrc->ring.tail = 0; - lrc->ctx_timestamp = 0; - - xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, - hwe->fence_irq, hwe->name); - - if (!gt->default_lrc[hwe->class]) { - init_data = empty_lrc_data(hwe); - if (!init_data) { - err = -ENOMEM; - goto err_lrc_finish; - } - } - - /* - * Init Per-Process of HW status Page, LRC / context state to known - * values - */ - map = __xe_lrc_pphwsp_map(lrc); - if (!init_data) { - xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ - xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, - gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, - xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE); - } else { - xe_map_memcpy_to(xe, &map, 0, init_data, - xe_gt_lrc_size(gt, hwe->class)); - kfree(init_data); - } - - if (vm) { - xe_lrc_set_ppgtt(lrc, vm); - - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, lrc->bo); - } - - if (xe_gt_has_indirect_ring_state(gt)) { - xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, - __xe_lrc_indirect_ring_ggtt_addr(lrc)); - - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, - __xe_lrc_ring_ggtt_addr(lrc)); - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0); - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0); - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail); - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL, - RING_CTL_SIZE(lrc->ring.size) | RING_VALID); - } else { - xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); - xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); - xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); - xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, - RING_CTL_SIZE(lrc->ring.size) | RING_VALID); - } - - xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0); - - if (xe->info.has_asid && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); - - lrc->desc = LRC_VALID; - lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); - /* TODO: Priority */ - - /* While this appears to have something about privileged batches or - * some such, it really just means PPGTT mode. - */ - if (vm) - lrc->desc |= LRC_PRIVILEGE; - - if (GRAPHICS_VERx100(xe) < 1250) { - lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance); - lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class); - } - - arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; - xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); - - map = __xe_lrc_seqno_map(lrc); - xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); - - map = __xe_lrc_start_seqno_map(lrc); - xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); - - return 0; - -err_lrc_finish: - xe_lrc_finish(lrc); - return err; -} - -/** - * xe_lrc_create - Create a LRC - * @hwe: Hardware Engine - * @vm: The VM (address space) - * @ring_size: LRC ring size - * - * Allocate and initialize the Logical Ring Context (LRC). - * - * Return pointer to created LRC upon success and an error pointer - * upon failure. - */ -struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, - u32 ring_size) -{ - struct xe_lrc *lrc; - int err; - - lrc = kzalloc(sizeof(*lrc), GFP_KERNEL); - if (!lrc) - return ERR_PTR(-ENOMEM); - - err = xe_lrc_init(lrc, hwe, vm, ring_size); - if (err) { - kfree(lrc); - return ERR_PTR(err); - } - - return lrc; -} - -/** - * xe_lrc_destroy - Destroy the LRC - * @ref: reference to LRC - * - * Called when ref == 0, release resources held by the Logical Ring Context - * (LRC) and free the LRC memory. - */ -void xe_lrc_destroy(struct kref *ref) -{ - struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount); - - xe_lrc_finish(lrc); - kfree(lrc); -} - -void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail) -{ - if (xe_lrc_has_indirect_ring_state(lrc)) - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail); - else - xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail); -} - -u32 xe_lrc_ring_tail(struct xe_lrc *lrc) -{ - if (xe_lrc_has_indirect_ring_state(lrc)) - return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR; - else - return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR; -} - -void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) -{ - if (xe_lrc_has_indirect_ring_state(lrc)) - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head); - else - xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); -} - -u32 xe_lrc_ring_head(struct xe_lrc *lrc) -{ - if (xe_lrc_has_indirect_ring_state(lrc)) - return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR; - else - return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; -} - -u32 xe_lrc_ring_space(struct xe_lrc *lrc) -{ - const u32 head = xe_lrc_ring_head(lrc); - const u32 tail = lrc->ring.tail; - const u32 size = lrc->ring.size; - - return ((head - tail - 1) & (size - 1)) + 1; -} - -static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, - const void *data, size_t size) -{ - struct xe_device *xe = lrc_to_xe(lrc); - - iosys_map_incr(&ring, lrc->ring.tail); - xe_map_memcpy_to(xe, &ring, 0, data, size); - lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); -} - -void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map ring; - u32 rhs; - size_t aligned_size; - - xe_assert(xe, IS_ALIGNED(size, 4)); - aligned_size = ALIGN(size, 8); - - ring = __xe_lrc_ring_map(lrc); - - xe_assert(xe, lrc->ring.tail < lrc->ring.size); - rhs = lrc->ring.size - lrc->ring.tail; - if (size > rhs) { - __xe_lrc_write_ring(lrc, ring, data, rhs); - __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); - } else { - __xe_lrc_write_ring(lrc, ring, data, size); - } - - if (aligned_size > size) { - u32 noop = MI_NOOP; - - __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); - } -} - -u64 xe_lrc_descriptor(struct xe_lrc *lrc) -{ - return lrc->desc | xe_lrc_ggtt_addr(lrc); -} - -u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_seqno_ggtt_addr(lrc); -} - -/** - * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence. - * - * Allocate but don't initialize an lrc seqno fence. - * - * Return: Pointer to the allocated fence or - * negative error pointer on error. - */ -struct dma_fence *xe_lrc_alloc_seqno_fence(void) -{ - return xe_hw_fence_alloc(); -} - -/** - * xe_lrc_free_seqno_fence() - Free an lrc seqno fence. - * @fence: Pointer to the fence to free. - * - * Frees an lrc seqno fence that hasn't yet been - * initialized. - */ -void xe_lrc_free_seqno_fence(struct dma_fence *fence) -{ - xe_hw_fence_free(fence); -} - -/** - * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence. - * @lrc: Pointer to the lrc. - * @fence: Pointer to the fence to initialize. - * - * Initializes a pre-allocated lrc seqno fence. - * After initialization, the fence is subject to normal - * dma-fence refcounting. - */ -void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence) -{ - xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc)); -} - -s32 xe_lrc_seqno(struct xe_lrc *lrc) -{ - struct iosys_map map = __xe_lrc_seqno_map(lrc); - - return xe_map_read32(lrc_to_xe(lrc), &map); -} - -s32 xe_lrc_start_seqno(struct xe_lrc *lrc) -{ - struct iosys_map map = __xe_lrc_start_seqno_map(lrc); - - return xe_map_read32(lrc_to_xe(lrc), &map); -} - -u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_start_seqno_ggtt_addr(lrc); -} - -u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_parallel_ggtt_addr(lrc); -} - -struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) -{ - return __xe_lrc_parallel_map(lrc); -} - -static int instr_dw(u32 cmd_header) -{ - /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ - if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == - GFXPIPE_SINGLE_DW_CMD(0, 0)) - return 1; - - /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ - if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) - return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; - - /* Most instructions have the # of dwords (minus 2) in 7:0 */ - return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; -} - -static int dump_mi_command(struct drm_printer *p, - struct xe_gt *gt, - u32 *dw, - int remaining_dw) -{ - u32 inst_header = *dw; - u32 numdw = instr_dw(inst_header); - u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); - int num_noop; - - /* First check for commands that don't have/use a '# DW' field */ - switch (inst_header & MI_OPCODE) { - case MI_NOOP: - num_noop = 1; - while (num_noop < remaining_dw && - (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) - num_noop++; - drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); - return num_noop; - - case MI_TOPOLOGY_FILTER: - drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); - return 1; - - case MI_BATCH_BUFFER_END: - drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); - /* Return 'remaining_dw' to consume the rest of the LRC */ - return remaining_dw; - } - - /* - * Any remaining commands include a # of dwords. We should make sure - * it doesn't exceed the remaining size of the LRC. - */ - if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) - numdw = remaining_dw; - - switch (inst_header & MI_OPCODE) { - case MI_LOAD_REGISTER_IMM: - drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", - inst_header, (numdw - 1) / 2); - for (int i = 1; i < numdw; i += 2) - drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); - return numdw; - - case MI_LOAD_REGISTER_MEM & MI_OPCODE: - drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", - inst_header, - dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", - dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); - if (numdw == 4) - drm_printf(p, " - %#6x = %#010llx\n", - dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); - else - drm_printf(p, " - %*ph (%s)\n", - (int)sizeof(u32) * (numdw - 1), dw + 1, - numdw < 4 ? "truncated" : "malformed"); - return numdw; - - case MI_FORCE_WAKEUP: - drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); - return numdw; - - default: - drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", - inst_header, opcode, numdw); - return numdw; - } -} - -static int dump_gfxpipe_command(struct drm_printer *p, - struct xe_gt *gt, - u32 *dw, - int remaining_dw) -{ - u32 numdw = instr_dw(*dw); - u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); - u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); - u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); - - /* - * Make sure we haven't mis-parsed a number of dwords that exceeds the - * remaining size of the LRC. - */ - if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) - numdw = remaining_dw; - - switch (*dw & GFXPIPE_MATCH_MASK) { -#define MATCH(cmd) \ - case cmd: \ - drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ - return numdw -#define MATCH3D(cmd) \ - case CMD_##cmd: \ - drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ - return numdw - - MATCH(STATE_BASE_ADDRESS); - MATCH(STATE_SIP); - MATCH(GPGPU_CSR_BASE_ADDRESS); - MATCH(STATE_COMPUTE_MODE); - MATCH3D(3DSTATE_BTD); - MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS); - MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS); - - MATCH3D(3DSTATE_VF_STATISTICS); - - MATCH(PIPELINE_SELECT); - - MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); - MATCH3D(3DSTATE_CLEAR_PARAMS); - MATCH3D(3DSTATE_DEPTH_BUFFER); - MATCH3D(3DSTATE_STENCIL_BUFFER); - MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); - MATCH3D(3DSTATE_VERTEX_BUFFERS); - MATCH3D(3DSTATE_VERTEX_ELEMENTS); - MATCH3D(3DSTATE_INDEX_BUFFER); - MATCH3D(3DSTATE_VF); - MATCH3D(3DSTATE_MULTISAMPLE); - MATCH3D(3DSTATE_CC_STATE_POINTERS); - MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); - MATCH3D(3DSTATE_VS); - MATCH3D(3DSTATE_GS); - MATCH3D(3DSTATE_CLIP); - MATCH3D(3DSTATE_SF); - MATCH3D(3DSTATE_WM); - MATCH3D(3DSTATE_CONSTANT_VS); - MATCH3D(3DSTATE_CONSTANT_GS); - MATCH3D(3DSTATE_CONSTANT_PS); - MATCH3D(3DSTATE_SAMPLE_MASK); - MATCH3D(3DSTATE_CONSTANT_HS); - MATCH3D(3DSTATE_CONSTANT_DS); - MATCH3D(3DSTATE_HS); - MATCH3D(3DSTATE_TE); - MATCH3D(3DSTATE_DS); - MATCH3D(3DSTATE_STREAMOUT); - MATCH3D(3DSTATE_SBE); - MATCH3D(3DSTATE_PS); - MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); - MATCH3D(3DSTATE_CPS_POINTERS); - MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); - MATCH3D(3DSTATE_BLEND_STATE_POINTERS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); - MATCH3D(3DSTATE_VF_INSTANCING); - MATCH3D(3DSTATE_VF_SGVS); - MATCH3D(3DSTATE_VF_TOPOLOGY); - MATCH3D(3DSTATE_WM_CHROMAKEY); - MATCH3D(3DSTATE_PS_BLEND); - MATCH3D(3DSTATE_WM_DEPTH_STENCIL); - MATCH3D(3DSTATE_PS_EXTRA); - MATCH3D(3DSTATE_RASTER); - MATCH3D(3DSTATE_SBE_SWIZ); - MATCH3D(3DSTATE_WM_HZ_OP); - MATCH3D(3DSTATE_VF_COMPONENT_PACKING); - MATCH3D(3DSTATE_VF_SGVS_2); - MATCH3D(3DSTATE_VFG); - MATCH3D(3DSTATE_URB_ALLOC_VS); - MATCH3D(3DSTATE_URB_ALLOC_HS); - MATCH3D(3DSTATE_URB_ALLOC_DS); - MATCH3D(3DSTATE_URB_ALLOC_GS); - MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); - MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); - MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); - MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); - MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); - MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); - MATCH3D(3DSTATE_AMFS); - MATCH3D(3DSTATE_DEPTH_BOUNDS); - MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); - MATCH3D(3DSTATE_CONSTANT_TS_POINTER); - MATCH3D(3DSTATE_MESH_CONTROL); - MATCH3D(3DSTATE_MESH_DISTRIB); - MATCH3D(3DSTATE_TASK_REDISTRIB); - MATCH3D(3DSTATE_MESH_SHADER); - MATCH3D(3DSTATE_MESH_SHADER_DATA); - MATCH3D(3DSTATE_TASK_CONTROL); - MATCH3D(3DSTATE_TASK_SHADER); - MATCH3D(3DSTATE_TASK_SHADER_DATA); - MATCH3D(3DSTATE_URB_ALLOC_MESH); - MATCH3D(3DSTATE_URB_ALLOC_TASK); - MATCH3D(3DSTATE_CLIP_MESH); - MATCH3D(3DSTATE_SBE_MESH); - MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); - - MATCH3D(3DSTATE_DRAWING_RECTANGLE); - MATCH3D(3DSTATE_CHROMA_KEY); - MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); - MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); - MATCH3D(3DSTATE_LINE_STIPPLE); - MATCH3D(3DSTATE_AA_LINE_PARAMETERS); - MATCH3D(3DSTATE_MONOFILTER_SIZE); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); - MATCH3D(3DSTATE_SO_DECL_LIST); - MATCH3D(3DSTATE_SO_BUFFER); - MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); - MATCH3D(3DSTATE_SAMPLE_PATTERN); - MATCH3D(3DSTATE_3D_MODE); - MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); - MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); - MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); - - default: - drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", - *dw, pipeline, opcode, subopcode, numdw); - return numdw; - } -} - -static int dump_gfx_state_command(struct drm_printer *p, - struct xe_gt *gt, - u32 *dw, - int remaining_dw) -{ - u32 numdw = instr_dw(*dw); - u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw); - - /* - * Make sure we haven't mis-parsed a number of dwords that exceeds the - * remaining size of the LRC. - */ - if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) - numdw = remaining_dw; - - switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) { - MATCH(STATE_WRITE_INLINE); - - default: - drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n", - *dw, opcode, numdw); - return numdw; - } -} - -void xe_lrc_dump_default(struct drm_printer *p, - struct xe_gt *gt, - enum xe_engine_class hwe_class) -{ - u32 *dw; - int remaining_dw, num_dw; - - if (!gt->default_lrc[hwe_class]) { - drm_printf(p, "No default LRC for class %d\n", hwe_class); - return; - } - - /* - * Skip the beginning of the LRC since it contains the per-process - * hardware status page. - */ - dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; - remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4; - - while (remaining_dw > 0) { - if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { - num_dw = dump_mi_command(p, gt, dw, remaining_dw); - } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { - num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); - } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) { - num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw); - } else { - num_dw = min(instr_dw(*dw), remaining_dw); - drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", - *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), - num_dw); - } - - dw += num_dw; - remaining_dw -= num_dw; - } -} - -struct instr_state { - u32 instr; - u16 num_dw; -}; - -static const struct instr_state xe_hpg_svg_state[] = { - { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, - { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, - { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, - { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, - { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, - { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, - { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, - { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, - { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, - { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, - { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, - { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, - { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, - { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, - { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, - { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, - { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, - { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, - { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, - { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, - { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, - { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, - { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, - { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, - { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, - { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, - { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, - { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, - { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, - { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, - { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, - { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, - { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, - { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, -}; - -void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) -{ - struct xe_gt *gt = q->hwe->gt; - struct xe_device *xe = gt_to_xe(gt); - const struct instr_state *state_table = NULL; - int state_table_size = 0; - - /* - * Wa_14019789679 - * - * If the driver doesn't explicitly emit the SVG instructions while - * setting up the default LRC, the context switch will write 0's - * (noops) into the LRC memory rather than the expected instruction - * headers. Application contexts start out as a copy of the default - * LRC, and if they also do not emit specific settings for some SVG - * state, then on context restore they'll unintentionally inherit - * whatever state setting the previous context had programmed into the - * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will - * prevent the hardware from resetting that state back to any specific - * value). - * - * The official workaround only requires emitting 3DSTATE_MESH_CONTROL - * since that's a specific state setting that can easily cause GPU - * hangs if unintentionally inherited. However to be safe we'll - * continue to emit all of the SVG state since it's best not to leak - * any of the state between contexts, even if that leakage is harmless. - */ - if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { - state_table = xe_hpg_svg_state; - state_table_size = ARRAY_SIZE(xe_hpg_svg_state); - } - - if (!state_table) { - xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", - GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); - return; - } - - for (int i = 0; i < state_table_size; i++) { - u32 instr = state_table[i].instr; - u16 num_dw = state_table[i].num_dw; - bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); - - xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); - xe_gt_assert(gt, num_dw != 0); - xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); - - /* - * Xe2's SVG context is the same as the one on DG2 / MTL - * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has - * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). - * Just make the replacement here rather than defining a - * whole separate table for the single trivial change. - */ - if (GRAPHICS_VER(xe) >= 20 && - instr == CMD_3DSTATE_DRAWING_RECTANGLE) - instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; - - bb->cs[bb->len] = instr; - if (!is_single_dw) - bb->cs[bb->len] |= (num_dw - 2); - - bb->len += num_dw; - } -} - -struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) -{ - struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT); - - if (!snapshot) - return NULL; - -<<<<<<< - if (lrc->bo && lrc->bo->vm) -======= - if (lrc->bo->vm) ->>>>>>> - xe_vm_get(lrc->bo->vm); - - snapshot->context_desc = xe_lrc_ggtt_addr(lrc); - snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); - snapshot->head = xe_lrc_ring_head(lrc); - snapshot->tail.internal = lrc->ring.tail; - snapshot->tail.memory = xe_lrc_ring_tail(lrc); - snapshot->start_seqno = xe_lrc_start_seqno(lrc); - snapshot->seqno = xe_lrc_seqno(lrc); - snapshot->lrc_bo = xe_bo_get(lrc->bo); - snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); - snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; - snapshot->lrc_snapshot = NULL; - snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); - snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); - return snapshot; -} - -void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) -{ - struct xe_bo *bo; - struct xe_vm *vm; - struct iosys_map src; - - if (!snapshot) - return; - - bo = snapshot->lrc_bo; - vm = bo->vm; - snapshot->lrc_bo = NULL; - - snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); - if (!snapshot->lrc_snapshot) - goto put_bo; - - xe_bo_lock(bo, false); - if (!ttm_bo_vmap(&bo->ttm, &src)) { - xe_map_memcpy_from(xe_bo_device(bo), - snapshot->lrc_snapshot, &src, snapshot->lrc_offset, - snapshot->lrc_size); - ttm_bo_vunmap(&bo->ttm, &src); - } else { - kvfree(snapshot->lrc_snapshot); - snapshot->lrc_snapshot = NULL; - } - xe_bo_unlock(bo); -put_bo: - xe_bo_put(bo); - if (vm) - xe_vm_put(vm); -} - -void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) -{ - unsigned long i; - - if (!snapshot) - return; - - drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); - drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n", - snapshot->indirect_context_desc); - drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); - drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", - snapshot->tail.internal, snapshot->tail.memory); - drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); - drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); - drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp); - drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp); - - if (!snapshot->lrc_snapshot) - return; - - drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE); - drm_puts(p, "\t[HWSP].data: "); - for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) { - u32 *val = snapshot->lrc_snapshot + i; - char dumped[ASCII85_BUFSZ]; - - drm_puts(p, ascii85_encode(*val, dumped)); - } - - drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE); - drm_puts(p, "\t[HWCTX].data: "); - for (; i < snapshot->lrc_size; i += sizeof(u32)) { - u32 *val = snapshot->lrc_snapshot + i; - char dumped[ASCII85_BUFSZ]; - - drm_puts(p, ascii85_encode(*val, dumped)); - } - drm_puts(p, "\n"); -} - -void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) -{ - if (!snapshot) - return; - - kvfree(snapshot->lrc_snapshot); - if (snapshot->lrc_bo) { - struct xe_vm *vm; - - vm = snapshot->lrc_bo->vm; - xe_bo_put(snapshot->lrc_bo); - if (vm) - xe_vm_put(vm); - } - kfree(snapshot); -} - -/** - * xe_lrc_update_timestamp() - Update ctx timestamp - * @lrc: Pointer to the lrc. - * @old_ts: Old timestamp value - * - * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and - * update saved value. - * - * Returns: New ctx timestamp value - */ -u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) -{ - *old_ts = lrc->ctx_timestamp; - - lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); - - return lrc->ctx_timestamp; -} diff --git a/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage.1 b/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage.1 deleted file mode 100644 index bee934c9371f..000000000000 --- a/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage.1 +++ /dev/null @@ -1,1784 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2021 Intel Corporation - */ - -#include "xe_lrc.h" - -#include <generated/xe_wa_oob.h> - -#include <linux/ascii85.h> - -#include "instructions/xe_mi_commands.h" -#include "instructions/xe_gfxpipe_commands.h" -#include "instructions/xe_gfx_state_commands.h" -#include "regs/xe_engine_regs.h" -#include "regs/xe_lrc_layout.h" -#include "xe_bb.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_exec_queue_types.h" -#include "xe_gt.h" -#include "xe_gt_printk.h" -#include "xe_hw_fence.h" -#include "xe_map.h" -#include "xe_memirq.h" -#include "xe_sriov.h" -#include "xe_vm.h" -#include "xe_wa.h" - -#define LRC_VALID BIT_ULL(0) -#define LRC_PRIVILEGE BIT_ULL(8) -#define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3) -#define LRC_LEGACY_64B_CONTEXT 3 - -#define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) -#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) - -#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K - -struct xe_lrc_snapshot { - struct xe_bo *lrc_bo; - void *lrc_snapshot; - unsigned long lrc_size, lrc_offset; - - u32 context_desc; - u32 indirect_context_desc; - u32 head; - struct { - u32 internal; - u32 memory; - } tail; - u32 start_seqno; - u32 seqno; - u32 ctx_timestamp; - u32 ctx_job_timestamp; -}; - -static struct xe_device * -lrc_to_xe(struct xe_lrc *lrc) -{ - return gt_to_xe(lrc->fence_ctx.gt); -} - -size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) -{ - struct xe_device *xe = gt_to_xe(gt); - size_t size; - - switch (class) { - case XE_ENGINE_CLASS_RENDER: - if (GRAPHICS_VER(xe) >= 20) - size = 4 * SZ_4K; - else - size = 14 * SZ_4K; - break; - case XE_ENGINE_CLASS_COMPUTE: - /* 14 pages since graphics_ver == 11 */ - if (GRAPHICS_VER(xe) >= 20) - size = 3 * SZ_4K; - else - size = 14 * SZ_4K; - break; - default: - WARN(1, "Unknown engine class: %d", class); - fallthrough; - case XE_ENGINE_CLASS_COPY: - case XE_ENGINE_CLASS_VIDEO_DECODE: - case XE_ENGINE_CLASS_VIDEO_ENHANCE: - case XE_ENGINE_CLASS_OTHER: - size = 2 * SZ_4K; - } - - /* Add indirect ring state page */ - if (xe_gt_has_indirect_ring_state(gt)) - size += LRC_INDIRECT_RING_STATE_SIZE; - - return size; -} - -/* - * The per-platform tables are u8-encoded in @data. Decode @data and set the - * addresses' offset and commands in @regs. The following encoding is used - * for each byte. There are 2 steps: decoding commands and decoding addresses. - * - * Commands: - * [7]: create NOPs - number of NOPs are set in lower bits - * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set - * MI_LRI_FORCE_POSTED - * [5:0]: Number of NOPs or registers to set values to in case of - * MI_LOAD_REGISTER_IMM - * - * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" - * number of registers. They are set by using the REG/REG16 macros: the former - * is used for offsets smaller than 0x200 while the latter is for values bigger - * than that. Those macros already set all the bits documented below correctly: - * - * [7]: When a register offset needs more than 6 bits, use additional bytes, to - * follow, for the lower bits - * [6:0]: Register offset, without considering the engine base. - * - * This function only tweaks the commands and register offsets. Values are not - * filled out. - */ -static void set_offsets(u32 *regs, - const u8 *data, - const struct xe_hw_engine *hwe) -#define NOP(x) (BIT(7) | (x)) -#define LRI(count, flags) ((flags) << 6 | (count) | \ - BUILD_BUG_ON_ZERO(count >= BIT(6))) -#define POSTED BIT(0) -#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) -#define REG16(x) \ - (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ - (((x) >> 2) & 0x7f) -{ - const u32 base = hwe->mmio_base; - - while (*data) { - u8 count, flags; - - if (*data & BIT(7)) { /* skip */ - count = *data++ & ~BIT(7); - regs += count; - continue; - } - - count = *data & 0x3f; - flags = *data >> 6; - data++; - - *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); - if (flags & POSTED) - *regs |= MI_LRI_FORCE_POSTED; - *regs |= MI_LRI_LRM_CS_MMIO; - regs++; - - xe_gt_assert(hwe->gt, count); - do { - u32 offset = 0; - u8 v; - - do { - v = *data++; - offset <<= 7; - offset |= v & ~BIT(7); - } while (v & BIT(7)); - - regs[0] = base + (offset << 2); - regs += 2; - } while (--count); - } - - *regs = MI_BATCH_BUFFER_END | BIT(0); -} - -static const u8 gen12_xcs_offsets[] = { - NOP(1), - LRI(13, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - - NOP(5), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - 0 -}; - -static const u8 dg2_xcs_offsets[] = { - NOP(1), - LRI(15, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - REG(0x120), - REG(0x124), - - NOP(1), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - 0 -}; - -static const u8 gen12_rcs_offsets[] = { - NOP(1), - LRI(13, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - - NOP(5), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - LRI(3, POSTED), - REG(0x1b0), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - NOP(3 + 9 + 1), - - LRI(51, POSTED), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG(0x028), - REG(0x09c), - REG(0x0c0), - REG(0x178), - REG(0x17c), - REG16(0x358), - REG(0x170), - REG(0x150), - REG(0x154), - REG(0x158), - REG16(0x41c), - REG16(0x600), - REG16(0x604), - REG16(0x608), - REG16(0x60c), - REG16(0x610), - REG16(0x614), - REG16(0x618), - REG16(0x61c), - REG16(0x620), - REG16(0x624), - REG16(0x628), - REG16(0x62c), - REG16(0x630), - REG16(0x634), - REG16(0x638), - REG16(0x63c), - REG16(0x640), - REG16(0x644), - REG16(0x648), - REG16(0x64c), - REG16(0x650), - REG16(0x654), - REG16(0x658), - REG16(0x65c), - REG16(0x660), - REG16(0x664), - REG16(0x668), - REG16(0x66c), - REG16(0x670), - REG16(0x674), - REG16(0x678), - REG16(0x67c), - REG(0x068), - REG(0x084), - NOP(1), - - 0 -}; - -static const u8 xehp_rcs_offsets[] = { - NOP(1), - LRI(13, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - - NOP(5), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - LRI(3, POSTED), - REG(0x1b0), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - - 0 -}; - -static const u8 dg2_rcs_offsets[] = { - NOP(1), - LRI(15, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - REG(0x120), - REG(0x124), - - NOP(1), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - LRI(3, POSTED), - REG(0x1b0), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - - 0 -}; - -static const u8 mtl_rcs_offsets[] = { - NOP(1), - LRI(15, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - REG(0x120), - REG(0x124), - - NOP(1), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - NOP(2), - LRI(2, POSTED), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - - 0 -}; - -#define XE2_CTX_COMMON \ - NOP(1), /* [0x00] */ \ - LRI(15, POSTED), /* [0x01] */ \ - REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ - REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ - REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ - REG(0x038), /* [0x08] RING_BUFFER_START */ \ - REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ - REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ - REG(0x140), /* [0x0e] BB_ADDR */ \ - REG(0x110), /* [0x10] BB_STATE */ \ - REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ - REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ - REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ - REG(0x180), /* [0x18] CCID */ \ - REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ - REG(0x120), /* [0x1c] PRT_BB_STATE */ \ - REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ - \ - NOP(1), /* [0x20] */ \ - LRI(9, POSTED), /* [0x21] */ \ - REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ - REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ - REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ - REG16(0x284), /* [0x28] dummy reg */ \ - REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ - REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ - REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ - REG16(0x274), /* [0x30] PTBP_UDW */ \ - REG16(0x270) /* [0x32] PTBP_LDW */ - -static const u8 xe2_rcs_offsets[] = { - XE2_CTX_COMMON, - - NOP(2), /* [0x34] */ - LRI(2, POSTED), /* [0x36] */ - REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ - REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ - - NOP(6), /* [0x41] */ - LRI(1, 0), /* [0x47] */ - REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ - - 0 -}; - -static const u8 xe2_bcs_offsets[] = { - XE2_CTX_COMMON, - - NOP(4 + 8 + 1), /* [0x34] */ - LRI(2, POSTED), /* [0x41] */ - REG16(0x200), /* [0x42] BCS_SWCTRL */ - REG16(0x204), /* [0x44] BLIT_CCTL */ - - 0 -}; - -static const u8 xe2_xcs_offsets[] = { - XE2_CTX_COMMON, - - 0 -}; - -static const u8 xe2_indirect_ring_state_offsets[] = { - NOP(1), /* [0x00] */ - LRI(5, POSTED), /* [0x01] */ - REG(0x034), /* [0x02] RING_BUFFER_HEAD */ - REG(0x030), /* [0x04] RING_BUFFER_TAIL */ - REG(0x038), /* [0x06] RING_BUFFER_START */ - REG(0x048), /* [0x08] RING_BUFFER_START_UDW */ - REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ - - NOP(5), /* [0x0c] */ - LRI(9, POSTED), /* [0x11] */ - REG(0x168), /* [0x12] BB_ADDR_UDW */ - REG(0x140), /* [0x14] BB_ADDR */ - REG(0x110), /* [0x16] BB_STATE */ - REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */ - - NOP(12), /* [0x00] */ - - 0 -}; - -#undef REG16 -#undef REG -#undef LRI -#undef NOP - -static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) -{ - if (class == XE_ENGINE_CLASS_RENDER) { - if (GRAPHICS_VER(xe) >= 20) - return xe2_rcs_offsets; - else if (GRAPHICS_VERx100(xe) >= 1270) - return mtl_rcs_offsets; - else if (GRAPHICS_VERx100(xe) >= 1255) - return dg2_rcs_offsets; - else if (GRAPHICS_VERx100(xe) >= 1250) - return xehp_rcs_offsets; - else - return gen12_rcs_offsets; - } else if (class == XE_ENGINE_CLASS_COPY) { - if (GRAPHICS_VER(xe) >= 20) - return xe2_bcs_offsets; - else - return gen12_xcs_offsets; - } else { - if (GRAPHICS_VER(xe) >= 20) - return xe2_xcs_offsets; - else if (GRAPHICS_VERx100(xe) >= 1255) - return dg2_xcs_offsets; - else - return gen12_xcs_offsets; - } -} - -static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) -{ - regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); - - if (xe_gt_has_indirect_ring_state(hwe->gt)) - regs[CTX_CONTEXT_CONTROL] |= - _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); - - /* TODO: Timestamp */ -} - -static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) -{ - struct xe_memirq *memirq = >_to_tile(hwe->gt)->sriov.vf.memirq; - struct xe_device *xe = gt_to_xe(hwe->gt); - - if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe)) - return; - - regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM | - MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT; - regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; - regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); - - regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | - MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; - regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; - regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq); - regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; - regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq); -} - -static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) -{ - struct xe_device *xe = gt_to_xe(hwe->gt); - - if (GRAPHICS_VERx100(xe) >= 1250) - return 0x70; - else - return 0x60; -} - -static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) -{ - int x; - - x = lrc_ring_mi_mode(hwe); - regs[x + 1] &= ~STOP_RING; - regs[x + 1] |= STOP_RING << 16; -} - -static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc) -{ - return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE; -} - -static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) -{ - return 0; -} - -u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) -{ - return lrc->ring.size; -} - -/* Make the magic macros work */ -#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset -#define __xe_lrc_regs_offset xe_lrc_regs_offset - -#define LRC_SEQNO_PPHWSP_OFFSET 512 -#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) -#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) -#define LRC_PARALLEL_PPHWSP_OFFSET 2048 -#define LRC_PPHWSP_SIZE SZ_4K - -u32 xe_lrc_regs_offset(struct xe_lrc *lrc) -{ - return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; -} - -static size_t lrc_reg_size(struct xe_device *xe) -{ - if (GRAPHICS_VERx100(xe) >= 1250) - return 96 * sizeof(u32); - else - return 80 * sizeof(u32); -} - -size_t xe_lrc_skip_size(struct xe_device *xe) -{ - return LRC_PPHWSP_SIZE + lrc_reg_size(xe); -} - -static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) -{ - /* The seqno is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; -} - -static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) -{ - /* The start seqno is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; -} - -static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) -{ - /* The start seqno is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; -} - -static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) -{ - /* The parallel is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; -} - -static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) -{ - return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); -} - -static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) -{ - /* Indirect ring state page is at the very end of LRC */ - return lrc->size - LRC_INDIRECT_RING_STATE_SIZE; -} - -#define DECL_MAP_ADDR_HELPERS(elem) \ -static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ -{ \ - struct iosys_map map = lrc->bo->vmap; \ -\ - xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ - iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ - return map; \ -} \ -static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ -{ \ - return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ -} \ - -DECL_MAP_ADDR_HELPERS(ring) -DECL_MAP_ADDR_HELPERS(pphwsp) -DECL_MAP_ADDR_HELPERS(seqno) -DECL_MAP_ADDR_HELPERS(regs) -DECL_MAP_ADDR_HELPERS(start_seqno) -DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) -DECL_MAP_ADDR_HELPERS(ctx_timestamp) -DECL_MAP_ADDR_HELPERS(parallel) -DECL_MAP_ADDR_HELPERS(indirect_ring) - -#undef DECL_MAP_ADDR_HELPERS - -/** - * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address - * @lrc: Pointer to the lrc. - * - * Returns: ctx timestamp GGTT address - */ -u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_ctx_timestamp_ggtt_addr(lrc); -} - -/** - * xe_lrc_ctx_timestamp() - Read ctx timestamp value - * @lrc: Pointer to the lrc. - * - * Returns: ctx timestamp value - */ -u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_ctx_timestamp_map(lrc); - return xe_map_read32(xe, &map); -} - -/** - * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address - * @lrc: Pointer to the lrc. - * - * Returns: ctx timestamp job GGTT address - */ -u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); -} - -/** - * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value - * @lrc: Pointer to the lrc. - * - * Returns: ctx timestamp job value - */ -u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_ctx_job_timestamp_map(lrc); - return xe_map_read32(xe, &map); -} - -u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_pphwsp_ggtt_addr(lrc); -} - -u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc) -{ - if (!xe_lrc_has_indirect_ring_state(lrc)) - return 0; - - return __xe_lrc_indirect_ring_ggtt_addr(lrc); -} - -static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_indirect_ring_map(lrc); - iosys_map_incr(&map, reg_nr * sizeof(u32)); - return xe_map_read32(xe, &map); -} - -static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc, - int reg_nr, u32 val) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_indirect_ring_map(lrc); - iosys_map_incr(&map, reg_nr * sizeof(u32)); - xe_map_write32(xe, &map, val); -} - -u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_regs_map(lrc); - iosys_map_incr(&map, reg_nr * sizeof(u32)); - return xe_map_read32(xe, &map); -} - -void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_regs_map(lrc); - iosys_map_incr(&map, reg_nr * sizeof(u32)); - xe_map_write32(xe, &map, val); -} - -static void *empty_lrc_data(struct xe_hw_engine *hwe) -{ - struct xe_gt *gt = hwe->gt; - void *data; - u32 *regs; - - data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL); - if (!data) - return NULL; - - /* 1st page: Per-Process of HW status Page */ - regs = data + LRC_PPHWSP_SIZE; - set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe); - set_context_control(regs, hwe); - set_memory_based_intr(regs, hwe); - reset_stop_ring(regs, hwe); - if (xe_gt_has_indirect_ring_state(gt)) { - regs = data + xe_gt_lrc_size(gt, hwe->class) - - LRC_INDIRECT_RING_STATE_SIZE; - set_offsets(regs, xe2_indirect_ring_state_offsets, hwe); - } - - return data; -} - -static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) -{ - u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); - - xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); - xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); -} - -static void xe_lrc_finish(struct xe_lrc *lrc) -{ - xe_hw_fence_ctx_finish(&lrc->fence_ctx); - xe_bo_lock(lrc->bo, false); - xe_bo_unpin(lrc->bo); - xe_bo_unlock(lrc->bo); - xe_bo_put(lrc->bo); -} - -#define PVC_CTX_ASID (0x2e + 1) -#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) - -static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, - struct xe_vm *vm, u32 ring_size) -{ - struct xe_gt *gt = hwe->gt; - struct xe_tile *tile = gt_to_tile(gt); - struct xe_device *xe = gt_to_xe(gt); - struct iosys_map map; - void *init_data = NULL; - u32 arb_enable; - u32 lrc_size; - int err; - - kref_init(&lrc->refcount); - lrc->flags = 0; - lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); - if (xe_gt_has_indirect_ring_state(gt)) - lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; - - /* - * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address - * via VM bind calls. - */ - lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); - if (IS_ERR(lrc->bo)) - return PTR_ERR(lrc->bo); - - lrc->size = lrc_size; - lrc->tile = gt_to_tile(hwe->gt); - lrc->ring.size = ring_size; - lrc->ring.tail = 0; - lrc->ctx_timestamp = 0; - - xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, - hwe->fence_irq, hwe->name); - - if (!gt->default_lrc[hwe->class]) { - init_data = empty_lrc_data(hwe); - if (!init_data) { - err = -ENOMEM; - goto err_lrc_finish; - } - } - - /* - * Init Per-Process of HW status Page, LRC / context state to known - * values - */ - map = __xe_lrc_pphwsp_map(lrc); - if (!init_data) { - xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ - xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, - gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, - xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE); - } else { - xe_map_memcpy_to(xe, &map, 0, init_data, - xe_gt_lrc_size(gt, hwe->class)); - kfree(init_data); - } - - if (vm) { - xe_lrc_set_ppgtt(lrc, vm); - - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, lrc->bo); - } - - if (xe_gt_has_indirect_ring_state(gt)) { - xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, - __xe_lrc_indirect_ring_ggtt_addr(lrc)); - - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, - __xe_lrc_ring_ggtt_addr(lrc)); - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0); - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0); - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail); - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL, - RING_CTL_SIZE(lrc->ring.size) | RING_VALID); - } else { - xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); - xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); - xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); - xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, - RING_CTL_SIZE(lrc->ring.size) | RING_VALID); - } - - xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0); - - if (xe->info.has_asid && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); - - lrc->desc = LRC_VALID; - lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); - /* TODO: Priority */ - - /* While this appears to have something about privileged batches or - * some such, it really just means PPGTT mode. - */ - if (vm) - lrc->desc |= LRC_PRIVILEGE; - - if (GRAPHICS_VERx100(xe) < 1250) { - lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance); - lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class); - } - - arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; - xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); - - map = __xe_lrc_seqno_map(lrc); - xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); - - map = __xe_lrc_start_seqno_map(lrc); - xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); - - return 0; - -err_lrc_finish: - xe_lrc_finish(lrc); - return err; -} - -/** - * xe_lrc_create - Create a LRC - * @hwe: Hardware Engine - * @vm: The VM (address space) - * @ring_size: LRC ring size - * - * Allocate and initialize the Logical Ring Context (LRC). - * - * Return pointer to created LRC upon success and an error pointer - * upon failure. - */ -struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, - u32 ring_size) -{ - struct xe_lrc *lrc; - int err; - - lrc = kzalloc(sizeof(*lrc), GFP_KERNEL); - if (!lrc) - return ERR_PTR(-ENOMEM); - - err = xe_lrc_init(lrc, hwe, vm, ring_size); - if (err) { - kfree(lrc); - return ERR_PTR(err); - } - - return lrc; -} - -/** - * xe_lrc_destroy - Destroy the LRC - * @ref: reference to LRC - * - * Called when ref == 0, release resources held by the Logical Ring Context - * (LRC) and free the LRC memory. - */ -void xe_lrc_destroy(struct kref *ref) -{ - struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount); - - xe_lrc_finish(lrc); - kfree(lrc); -} - -void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail) -{ - if (xe_lrc_has_indirect_ring_state(lrc)) - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail); - else - xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail); -} - -u32 xe_lrc_ring_tail(struct xe_lrc *lrc) -{ - if (xe_lrc_has_indirect_ring_state(lrc)) - return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR; - else - return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR; -} - -void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) -{ - if (xe_lrc_has_indirect_ring_state(lrc)) - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head); - else - xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); -} - -u32 xe_lrc_ring_head(struct xe_lrc *lrc) -{ - if (xe_lrc_has_indirect_ring_state(lrc)) - return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR; - else - return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; -} - -u32 xe_lrc_ring_space(struct xe_lrc *lrc) -{ - const u32 head = xe_lrc_ring_head(lrc); - const u32 tail = lrc->ring.tail; - const u32 size = lrc->ring.size; - - return ((head - tail - 1) & (size - 1)) + 1; -} - -static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, - const void *data, size_t size) -{ - struct xe_device *xe = lrc_to_xe(lrc); - - iosys_map_incr(&ring, lrc->ring.tail); - xe_map_memcpy_to(xe, &ring, 0, data, size); - lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); -} - -void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map ring; - u32 rhs; - size_t aligned_size; - - xe_assert(xe, IS_ALIGNED(size, 4)); - aligned_size = ALIGN(size, 8); - - ring = __xe_lrc_ring_map(lrc); - - xe_assert(xe, lrc->ring.tail < lrc->ring.size); - rhs = lrc->ring.size - lrc->ring.tail; - if (size > rhs) { - __xe_lrc_write_ring(lrc, ring, data, rhs); - __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); - } else { - __xe_lrc_write_ring(lrc, ring, data, size); - } - - if (aligned_size > size) { - u32 noop = MI_NOOP; - - __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); - } -} - -u64 xe_lrc_descriptor(struct xe_lrc *lrc) -{ - return lrc->desc | xe_lrc_ggtt_addr(lrc); -} - -u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_seqno_ggtt_addr(lrc); -} - -/** - * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence. - * - * Allocate but don't initialize an lrc seqno fence. - * - * Return: Pointer to the allocated fence or - * negative error pointer on error. - */ -struct dma_fence *xe_lrc_alloc_seqno_fence(void) -{ - return xe_hw_fence_alloc(); -} - -/** - * xe_lrc_free_seqno_fence() - Free an lrc seqno fence. - * @fence: Pointer to the fence to free. - * - * Frees an lrc seqno fence that hasn't yet been - * initialized. - */ -void xe_lrc_free_seqno_fence(struct dma_fence *fence) -{ - xe_hw_fence_free(fence); -} - -/** - * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence. - * @lrc: Pointer to the lrc. - * @fence: Pointer to the fence to initialize. - * - * Initializes a pre-allocated lrc seqno fence. - * After initialization, the fence is subject to normal - * dma-fence refcounting. - */ -void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence) -{ - xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc)); -} - -s32 xe_lrc_seqno(struct xe_lrc *lrc) -{ - struct iosys_map map = __xe_lrc_seqno_map(lrc); - - return xe_map_read32(lrc_to_xe(lrc), &map); -} - -s32 xe_lrc_start_seqno(struct xe_lrc *lrc) -{ - struct iosys_map map = __xe_lrc_start_seqno_map(lrc); - - return xe_map_read32(lrc_to_xe(lrc), &map); -} - -u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_start_seqno_ggtt_addr(lrc); -} - -u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_parallel_ggtt_addr(lrc); -} - -struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) -{ - return __xe_lrc_parallel_map(lrc); -} - -static int instr_dw(u32 cmd_header) -{ - /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ - if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == - GFXPIPE_SINGLE_DW_CMD(0, 0)) - return 1; - - /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ - if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) - return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; - - /* Most instructions have the # of dwords (minus 2) in 7:0 */ - return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; -} - -static int dump_mi_command(struct drm_printer *p, - struct xe_gt *gt, - u32 *dw, - int remaining_dw) -{ - u32 inst_header = *dw; - u32 numdw = instr_dw(inst_header); - u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); - int num_noop; - - /* First check for commands that don't have/use a '# DW' field */ - switch (inst_header & MI_OPCODE) { - case MI_NOOP: - num_noop = 1; - while (num_noop < remaining_dw && - (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) - num_noop++; - drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); - return num_noop; - - case MI_TOPOLOGY_FILTER: - drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); - return 1; - - case MI_BATCH_BUFFER_END: - drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); - /* Return 'remaining_dw' to consume the rest of the LRC */ - return remaining_dw; - } - - /* - * Any remaining commands include a # of dwords. We should make sure - * it doesn't exceed the remaining size of the LRC. - */ - if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) - numdw = remaining_dw; - - switch (inst_header & MI_OPCODE) { - case MI_LOAD_REGISTER_IMM: - drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", - inst_header, (numdw - 1) / 2); - for (int i = 1; i < numdw; i += 2) - drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); - return numdw; - - case MI_LOAD_REGISTER_MEM & MI_OPCODE: - drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", - inst_header, - dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", - dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); - if (numdw == 4) - drm_printf(p, " - %#6x = %#010llx\n", - dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); - else - drm_printf(p, " - %*ph (%s)\n", - (int)sizeof(u32) * (numdw - 1), dw + 1, - numdw < 4 ? "truncated" : "malformed"); - return numdw; - - case MI_FORCE_WAKEUP: - drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); - return numdw; - - default: - drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", - inst_header, opcode, numdw); - return numdw; - } -} - -static int dump_gfxpipe_command(struct drm_printer *p, - struct xe_gt *gt, - u32 *dw, - int remaining_dw) -{ - u32 numdw = instr_dw(*dw); - u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); - u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); - u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); - - /* - * Make sure we haven't mis-parsed a number of dwords that exceeds the - * remaining size of the LRC. - */ - if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) - numdw = remaining_dw; - - switch (*dw & GFXPIPE_MATCH_MASK) { -#define MATCH(cmd) \ - case cmd: \ - drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ - return numdw -#define MATCH3D(cmd) \ - case CMD_##cmd: \ - drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ - return numdw - - MATCH(STATE_BASE_ADDRESS); - MATCH(STATE_SIP); - MATCH(GPGPU_CSR_BASE_ADDRESS); - MATCH(STATE_COMPUTE_MODE); - MATCH3D(3DSTATE_BTD); - MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS); - MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS); - - MATCH3D(3DSTATE_VF_STATISTICS); - - MATCH(PIPELINE_SELECT); - - MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); - MATCH3D(3DSTATE_CLEAR_PARAMS); - MATCH3D(3DSTATE_DEPTH_BUFFER); - MATCH3D(3DSTATE_STENCIL_BUFFER); - MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); - MATCH3D(3DSTATE_VERTEX_BUFFERS); - MATCH3D(3DSTATE_VERTEX_ELEMENTS); - MATCH3D(3DSTATE_INDEX_BUFFER); - MATCH3D(3DSTATE_VF); - MATCH3D(3DSTATE_MULTISAMPLE); - MATCH3D(3DSTATE_CC_STATE_POINTERS); - MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); - MATCH3D(3DSTATE_VS); - MATCH3D(3DSTATE_GS); - MATCH3D(3DSTATE_CLIP); - MATCH3D(3DSTATE_SF); - MATCH3D(3DSTATE_WM); - MATCH3D(3DSTATE_CONSTANT_VS); - MATCH3D(3DSTATE_CONSTANT_GS); - MATCH3D(3DSTATE_CONSTANT_PS); - MATCH3D(3DSTATE_SAMPLE_MASK); - MATCH3D(3DSTATE_CONSTANT_HS); - MATCH3D(3DSTATE_CONSTANT_DS); - MATCH3D(3DSTATE_HS); - MATCH3D(3DSTATE_TE); - MATCH3D(3DSTATE_DS); - MATCH3D(3DSTATE_STREAMOUT); - MATCH3D(3DSTATE_SBE); - MATCH3D(3DSTATE_PS); - MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); - MATCH3D(3DSTATE_CPS_POINTERS); - MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); - MATCH3D(3DSTATE_BLEND_STATE_POINTERS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); - MATCH3D(3DSTATE_VF_INSTANCING); - MATCH3D(3DSTATE_VF_SGVS); - MATCH3D(3DSTATE_VF_TOPOLOGY); - MATCH3D(3DSTATE_WM_CHROMAKEY); - MATCH3D(3DSTATE_PS_BLEND); - MATCH3D(3DSTATE_WM_DEPTH_STENCIL); - MATCH3D(3DSTATE_PS_EXTRA); - MATCH3D(3DSTATE_RASTER); - MATCH3D(3DSTATE_SBE_SWIZ); - MATCH3D(3DSTATE_WM_HZ_OP); - MATCH3D(3DSTATE_VF_COMPONENT_PACKING); - MATCH3D(3DSTATE_VF_SGVS_2); - MATCH3D(3DSTATE_VFG); - MATCH3D(3DSTATE_URB_ALLOC_VS); - MATCH3D(3DSTATE_URB_ALLOC_HS); - MATCH3D(3DSTATE_URB_ALLOC_DS); - MATCH3D(3DSTATE_URB_ALLOC_GS); - MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); - MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); - MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); - MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); - MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); - MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); - MATCH3D(3DSTATE_AMFS); - MATCH3D(3DSTATE_DEPTH_BOUNDS); - MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); - MATCH3D(3DSTATE_CONSTANT_TS_POINTER); - MATCH3D(3DSTATE_MESH_CONTROL); - MATCH3D(3DSTATE_MESH_DISTRIB); - MATCH3D(3DSTATE_TASK_REDISTRIB); - MATCH3D(3DSTATE_MESH_SHADER); - MATCH3D(3DSTATE_MESH_SHADER_DATA); - MATCH3D(3DSTATE_TASK_CONTROL); - MATCH3D(3DSTATE_TASK_SHADER); - MATCH3D(3DSTATE_TASK_SHADER_DATA); - MATCH3D(3DSTATE_URB_ALLOC_MESH); - MATCH3D(3DSTATE_URB_ALLOC_TASK); - MATCH3D(3DSTATE_CLIP_MESH); - MATCH3D(3DSTATE_SBE_MESH); - MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); - - MATCH3D(3DSTATE_DRAWING_RECTANGLE); - MATCH3D(3DSTATE_CHROMA_KEY); - MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); - MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); - MATCH3D(3DSTATE_LINE_STIPPLE); - MATCH3D(3DSTATE_AA_LINE_PARAMETERS); - MATCH3D(3DSTATE_MONOFILTER_SIZE); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); - MATCH3D(3DSTATE_SO_DECL_LIST); - MATCH3D(3DSTATE_SO_BUFFER); - MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); - MATCH3D(3DSTATE_SAMPLE_PATTERN); - MATCH3D(3DSTATE_3D_MODE); - MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); - MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); - MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); - - default: - drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", - *dw, pipeline, opcode, subopcode, numdw); - return numdw; - } -} - -static int dump_gfx_state_command(struct drm_printer *p, - struct xe_gt *gt, - u32 *dw, - int remaining_dw) -{ - u32 numdw = instr_dw(*dw); - u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw); - - /* - * Make sure we haven't mis-parsed a number of dwords that exceeds the - * remaining size of the LRC. - */ - if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) - numdw = remaining_dw; - - switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) { - MATCH(STATE_WRITE_INLINE); - - default: - drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n", - *dw, opcode, numdw); - return numdw; - } -} - -void xe_lrc_dump_default(struct drm_printer *p, - struct xe_gt *gt, - enum xe_engine_class hwe_class) -{ - u32 *dw; - int remaining_dw, num_dw; - - if (!gt->default_lrc[hwe_class]) { - drm_printf(p, "No default LRC for class %d\n", hwe_class); - return; - } - - /* - * Skip the beginning of the LRC since it contains the per-process - * hardware status page. - */ - dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; - remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4; - - while (remaining_dw > 0) { - if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { - num_dw = dump_mi_command(p, gt, dw, remaining_dw); - } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { - num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); - } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) { - num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw); - } else { - num_dw = min(instr_dw(*dw), remaining_dw); - drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", - *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), - num_dw); - } - - dw += num_dw; - remaining_dw -= num_dw; - } -} - -struct instr_state { - u32 instr; - u16 num_dw; -}; - -static const struct instr_state xe_hpg_svg_state[] = { - { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, - { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, - { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, - { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, - { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, - { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, - { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, - { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, - { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, - { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, - { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, - { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, - { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, - { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, - { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, - { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, - { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, - { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, - { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, - { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, - { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, - { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, - { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, - { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, - { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, - { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, - { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, - { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, - { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, - { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, - { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, - { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, - { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, - { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, -}; - -void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) -{ - struct xe_gt *gt = q->hwe->gt; - struct xe_device *xe = gt_to_xe(gt); - const struct instr_state *state_table = NULL; - int state_table_size = 0; - - /* - * Wa_14019789679 - * - * If the driver doesn't explicitly emit the SVG instructions while - * setting up the default LRC, the context switch will write 0's - * (noops) into the LRC memory rather than the expected instruction - * headers. Application contexts start out as a copy of the default - * LRC, and if they also do not emit specific settings for some SVG - * state, then on context restore they'll unintentionally inherit - * whatever state setting the previous context had programmed into the - * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will - * prevent the hardware from resetting that state back to any specific - * value). - * - * The official workaround only requires emitting 3DSTATE_MESH_CONTROL - * since that's a specific state setting that can easily cause GPU - * hangs if unintentionally inherited. However to be safe we'll - * continue to emit all of the SVG state since it's best not to leak - * any of the state between contexts, even if that leakage is harmless. - */ - if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { - state_table = xe_hpg_svg_state; - state_table_size = ARRAY_SIZE(xe_hpg_svg_state); - } - - if (!state_table) { - xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", - GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); - return; - } - - for (int i = 0; i < state_table_size; i++) { - u32 instr = state_table[i].instr; - u16 num_dw = state_table[i].num_dw; - bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); - - xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); - xe_gt_assert(gt, num_dw != 0); - xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); - - /* - * Xe2's SVG context is the same as the one on DG2 / MTL - * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has - * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). - * Just make the replacement here rather than defining a - * whole separate table for the single trivial change. - */ - if (GRAPHICS_VER(xe) >= 20 && - instr == CMD_3DSTATE_DRAWING_RECTANGLE) - instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; - - bb->cs[bb->len] = instr; - if (!is_single_dw) - bb->cs[bb->len] |= (num_dw - 2); - - bb->len += num_dw; - } -} - -struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) -{ - struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT); - - if (!snapshot) - return NULL; - -<<<<<<< - if (lrc->bo && lrc->bo->vm) -======= - if (lrc->bo->vm) ->>>>>>> - xe_vm_get(lrc->bo->vm); - - snapshot->context_desc = xe_lrc_ggtt_addr(lrc); - snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); - snapshot->head = xe_lrc_ring_head(lrc); - snapshot->tail.internal = lrc->ring.tail; - snapshot->tail.memory = xe_lrc_ring_tail(lrc); - snapshot->start_seqno = xe_lrc_start_seqno(lrc); - snapshot->seqno = xe_lrc_seqno(lrc); - snapshot->lrc_bo = xe_bo_get(lrc->bo); - snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); - snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; - snapshot->lrc_snapshot = NULL; - snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); - snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); - return snapshot; -} - -void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) -{ - struct xe_bo *bo; - struct xe_vm *vm; - struct iosys_map src; - - if (!snapshot) - return; - - bo = snapshot->lrc_bo; - vm = bo->vm; - snapshot->lrc_bo = NULL; - - snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); - if (!snapshot->lrc_snapshot) - goto put_bo; - - xe_bo_lock(bo, false); - if (!ttm_bo_vmap(&bo->ttm, &src)) { - xe_map_memcpy_from(xe_bo_device(bo), - snapshot->lrc_snapshot, &src, snapshot->lrc_offset, - snapshot->lrc_size); - ttm_bo_vunmap(&bo->ttm, &src); - } else { - kvfree(snapshot->lrc_snapshot); - snapshot->lrc_snapshot = NULL; - } - xe_bo_unlock(bo); -put_bo: - xe_bo_put(bo); - if (vm) - xe_vm_put(vm); -} - -void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) -{ - unsigned long i; - - if (!snapshot) - return; - - drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); - drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n", - snapshot->indirect_context_desc); - drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); - drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", - snapshot->tail.internal, snapshot->tail.memory); - drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); - drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); - drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp); - drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp); - - if (!snapshot->lrc_snapshot) - return; - - drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE); - drm_puts(p, "\t[HWSP].data: "); - for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) { - u32 *val = snapshot->lrc_snapshot + i; - char dumped[ASCII85_BUFSZ]; - - drm_puts(p, ascii85_encode(*val, dumped)); - } - - drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE); - drm_puts(p, "\t[HWCTX].data: "); - for (; i < snapshot->lrc_size; i += sizeof(u32)) { - u32 *val = snapshot->lrc_snapshot + i; - char dumped[ASCII85_BUFSZ]; - - drm_puts(p, ascii85_encode(*val, dumped)); - } - drm_puts(p, "\n"); -} - -void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) -{ - if (!snapshot) - return; - - kvfree(snapshot->lrc_snapshot); - if (snapshot->lrc_bo) { - struct xe_vm *vm; - - vm = snapshot->lrc_bo->vm; - xe_bo_put(snapshot->lrc_bo); - if (vm) - xe_vm_put(vm); - } - kfree(snapshot); -} - -/** - * xe_lrc_update_timestamp() - Update ctx timestamp - * @lrc: Pointer to the lrc. - * @old_ts: Old timestamp value - * - * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and - * update saved value. - * - * Returns: New ctx timestamp value - */ -u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) -{ - *old_ts = lrc->ctx_timestamp; - - lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); - - return lrc->ctx_timestamp; -} diff --git a/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage.2 b/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage.2 deleted file mode 100644 index bee934c9371f..000000000000 --- a/rr-cache/33631052a6d9a474bd2e99e29c6698270b1c963c/preimage.2 +++ /dev/null @@ -1,1784 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2021 Intel Corporation - */ - -#include "xe_lrc.h" - -#include <generated/xe_wa_oob.h> - -#include <linux/ascii85.h> - -#include "instructions/xe_mi_commands.h" -#include "instructions/xe_gfxpipe_commands.h" -#include "instructions/xe_gfx_state_commands.h" -#include "regs/xe_engine_regs.h" -#include "regs/xe_lrc_layout.h" -#include "xe_bb.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_exec_queue_types.h" -#include "xe_gt.h" -#include "xe_gt_printk.h" -#include "xe_hw_fence.h" -#include "xe_map.h" -#include "xe_memirq.h" -#include "xe_sriov.h" -#include "xe_vm.h" -#include "xe_wa.h" - -#define LRC_VALID BIT_ULL(0) -#define LRC_PRIVILEGE BIT_ULL(8) -#define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3) -#define LRC_LEGACY_64B_CONTEXT 3 - -#define LRC_ENGINE_CLASS GENMASK_ULL(63, 61) -#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) - -#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K - -struct xe_lrc_snapshot { - struct xe_bo *lrc_bo; - void *lrc_snapshot; - unsigned long lrc_size, lrc_offset; - - u32 context_desc; - u32 indirect_context_desc; - u32 head; - struct { - u32 internal; - u32 memory; - } tail; - u32 start_seqno; - u32 seqno; - u32 ctx_timestamp; - u32 ctx_job_timestamp; -}; - -static struct xe_device * -lrc_to_xe(struct xe_lrc *lrc) -{ - return gt_to_xe(lrc->fence_ctx.gt); -} - -size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) -{ - struct xe_device *xe = gt_to_xe(gt); - size_t size; - - switch (class) { - case XE_ENGINE_CLASS_RENDER: - if (GRAPHICS_VER(xe) >= 20) - size = 4 * SZ_4K; - else - size = 14 * SZ_4K; - break; - case XE_ENGINE_CLASS_COMPUTE: - /* 14 pages since graphics_ver == 11 */ - if (GRAPHICS_VER(xe) >= 20) - size = 3 * SZ_4K; - else - size = 14 * SZ_4K; - break; - default: - WARN(1, "Unknown engine class: %d", class); - fallthrough; - case XE_ENGINE_CLASS_COPY: - case XE_ENGINE_CLASS_VIDEO_DECODE: - case XE_ENGINE_CLASS_VIDEO_ENHANCE: - case XE_ENGINE_CLASS_OTHER: - size = 2 * SZ_4K; - } - - /* Add indirect ring state page */ - if (xe_gt_has_indirect_ring_state(gt)) - size += LRC_INDIRECT_RING_STATE_SIZE; - - return size; -} - -/* - * The per-platform tables are u8-encoded in @data. Decode @data and set the - * addresses' offset and commands in @regs. The following encoding is used - * for each byte. There are 2 steps: decoding commands and decoding addresses. - * - * Commands: - * [7]: create NOPs - number of NOPs are set in lower bits - * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set - * MI_LRI_FORCE_POSTED - * [5:0]: Number of NOPs or registers to set values to in case of - * MI_LOAD_REGISTER_IMM - * - * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count" - * number of registers. They are set by using the REG/REG16 macros: the former - * is used for offsets smaller than 0x200 while the latter is for values bigger - * than that. Those macros already set all the bits documented below correctly: - * - * [7]: When a register offset needs more than 6 bits, use additional bytes, to - * follow, for the lower bits - * [6:0]: Register offset, without considering the engine base. - * - * This function only tweaks the commands and register offsets. Values are not - * filled out. - */ -static void set_offsets(u32 *regs, - const u8 *data, - const struct xe_hw_engine *hwe) -#define NOP(x) (BIT(7) | (x)) -#define LRI(count, flags) ((flags) << 6 | (count) | \ - BUILD_BUG_ON_ZERO(count >= BIT(6))) -#define POSTED BIT(0) -#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200)) -#define REG16(x) \ - (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ - (((x) >> 2) & 0x7f) -{ - const u32 base = hwe->mmio_base; - - while (*data) { - u8 count, flags; - - if (*data & BIT(7)) { /* skip */ - count = *data++ & ~BIT(7); - regs += count; - continue; - } - - count = *data & 0x3f; - flags = *data >> 6; - data++; - - *regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); - if (flags & POSTED) - *regs |= MI_LRI_FORCE_POSTED; - *regs |= MI_LRI_LRM_CS_MMIO; - regs++; - - xe_gt_assert(hwe->gt, count); - do { - u32 offset = 0; - u8 v; - - do { - v = *data++; - offset <<= 7; - offset |= v & ~BIT(7); - } while (v & BIT(7)); - - regs[0] = base + (offset << 2); - regs += 2; - } while (--count); - } - - *regs = MI_BATCH_BUFFER_END | BIT(0); -} - -static const u8 gen12_xcs_offsets[] = { - NOP(1), - LRI(13, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - - NOP(5), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - 0 -}; - -static const u8 dg2_xcs_offsets[] = { - NOP(1), - LRI(15, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - REG(0x120), - REG(0x124), - - NOP(1), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - 0 -}; - -static const u8 gen12_rcs_offsets[] = { - NOP(1), - LRI(13, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - - NOP(5), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - LRI(3, POSTED), - REG(0x1b0), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - NOP(3 + 9 + 1), - - LRI(51, POSTED), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG16(0x588), - REG(0x028), - REG(0x09c), - REG(0x0c0), - REG(0x178), - REG(0x17c), - REG16(0x358), - REG(0x170), - REG(0x150), - REG(0x154), - REG(0x158), - REG16(0x41c), - REG16(0x600), - REG16(0x604), - REG16(0x608), - REG16(0x60c), - REG16(0x610), - REG16(0x614), - REG16(0x618), - REG16(0x61c), - REG16(0x620), - REG16(0x624), - REG16(0x628), - REG16(0x62c), - REG16(0x630), - REG16(0x634), - REG16(0x638), - REG16(0x63c), - REG16(0x640), - REG16(0x644), - REG16(0x648), - REG16(0x64c), - REG16(0x650), - REG16(0x654), - REG16(0x658), - REG16(0x65c), - REG16(0x660), - REG16(0x664), - REG16(0x668), - REG16(0x66c), - REG16(0x670), - REG16(0x674), - REG16(0x678), - REG16(0x67c), - REG(0x068), - REG(0x084), - NOP(1), - - 0 -}; - -static const u8 xehp_rcs_offsets[] = { - NOP(1), - LRI(13, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - - NOP(5), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - LRI(3, POSTED), - REG(0x1b0), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - - 0 -}; - -static const u8 dg2_rcs_offsets[] = { - NOP(1), - LRI(15, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - REG(0x120), - REG(0x124), - - NOP(1), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - LRI(3, POSTED), - REG(0x1b0), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - - 0 -}; - -static const u8 mtl_rcs_offsets[] = { - NOP(1), - LRI(15, POSTED), - REG16(0x244), - REG(0x034), - REG(0x030), - REG(0x038), - REG(0x03c), - REG(0x168), - REG(0x140), - REG(0x110), - REG(0x1c0), - REG(0x1c4), - REG(0x1c8), - REG(0x180), - REG16(0x2b4), - REG(0x120), - REG(0x124), - - NOP(1), - LRI(9, POSTED), - REG16(0x3a8), - REG16(0x28c), - REG16(0x288), - REG16(0x284), - REG16(0x280), - REG16(0x27c), - REG16(0x278), - REG16(0x274), - REG16(0x270), - - NOP(2), - LRI(2, POSTED), - REG16(0x5a8), - REG16(0x5ac), - - NOP(6), - LRI(1, 0), - REG(0x0c8), - - 0 -}; - -#define XE2_CTX_COMMON \ - NOP(1), /* [0x00] */ \ - LRI(15, POSTED), /* [0x01] */ \ - REG16(0x244), /* [0x02] CTXT_SR_CTL */ \ - REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \ - REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \ - REG(0x038), /* [0x08] RING_BUFFER_START */ \ - REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \ - REG(0x168), /* [0x0c] BB_ADDR_UDW */ \ - REG(0x140), /* [0x0e] BB_ADDR */ \ - REG(0x110), /* [0x10] BB_STATE */ \ - REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \ - REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \ - REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \ - REG(0x180), /* [0x18] CCID */ \ - REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \ - REG(0x120), /* [0x1c] PRT_BB_STATE */ \ - REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \ - \ - NOP(1), /* [0x20] */ \ - LRI(9, POSTED), /* [0x21] */ \ - REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \ - REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \ - REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \ - REG16(0x284), /* [0x28] dummy reg */ \ - REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \ - REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \ - REG16(0x278), /* [0x2e] CS_CTX_ASID */ \ - REG16(0x274), /* [0x30] PTBP_UDW */ \ - REG16(0x270) /* [0x32] PTBP_LDW */ - -static const u8 xe2_rcs_offsets[] = { - XE2_CTX_COMMON, - - NOP(2), /* [0x34] */ - LRI(2, POSTED), /* [0x36] */ - REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */ - REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */ - - NOP(6), /* [0x41] */ - LRI(1, 0), /* [0x47] */ - REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */ - - 0 -}; - -static const u8 xe2_bcs_offsets[] = { - XE2_CTX_COMMON, - - NOP(4 + 8 + 1), /* [0x34] */ - LRI(2, POSTED), /* [0x41] */ - REG16(0x200), /* [0x42] BCS_SWCTRL */ - REG16(0x204), /* [0x44] BLIT_CCTL */ - - 0 -}; - -static const u8 xe2_xcs_offsets[] = { - XE2_CTX_COMMON, - - 0 -}; - -static const u8 xe2_indirect_ring_state_offsets[] = { - NOP(1), /* [0x00] */ - LRI(5, POSTED), /* [0x01] */ - REG(0x034), /* [0x02] RING_BUFFER_HEAD */ - REG(0x030), /* [0x04] RING_BUFFER_TAIL */ - REG(0x038), /* [0x06] RING_BUFFER_START */ - REG(0x048), /* [0x08] RING_BUFFER_START_UDW */ - REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ - - NOP(5), /* [0x0c] */ - LRI(9, POSTED), /* [0x11] */ - REG(0x168), /* [0x12] BB_ADDR_UDW */ - REG(0x140), /* [0x14] BB_ADDR */ - REG(0x110), /* [0x16] BB_STATE */ - REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */ - REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */ - - NOP(12), /* [0x00] */ - - 0 -}; - -#undef REG16 -#undef REG -#undef LRI -#undef NOP - -static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class) -{ - if (class == XE_ENGINE_CLASS_RENDER) { - if (GRAPHICS_VER(xe) >= 20) - return xe2_rcs_offsets; - else if (GRAPHICS_VERx100(xe) >= 1270) - return mtl_rcs_offsets; - else if (GRAPHICS_VERx100(xe) >= 1255) - return dg2_rcs_offsets; - else if (GRAPHICS_VERx100(xe) >= 1250) - return xehp_rcs_offsets; - else - return gen12_rcs_offsets; - } else if (class == XE_ENGINE_CLASS_COPY) { - if (GRAPHICS_VER(xe) >= 20) - return xe2_bcs_offsets; - else - return gen12_xcs_offsets; - } else { - if (GRAPHICS_VER(xe) >= 20) - return xe2_xcs_offsets; - else if (GRAPHICS_VERx100(xe) >= 1255) - return dg2_xcs_offsets; - else - return gen12_xcs_offsets; - } -} - -static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) -{ - regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); - - if (xe_gt_has_indirect_ring_state(hwe->gt)) - regs[CTX_CONTEXT_CONTROL] |= - _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); - - /* TODO: Timestamp */ -} - -static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) -{ - struct xe_memirq *memirq = >_to_tile(hwe->gt)->sriov.vf.memirq; - struct xe_device *xe = gt_to_xe(hwe->gt); - - if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe)) - return; - - regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM | - MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT; - regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr; - regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq); - - regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | - MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED; - regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr; - regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq); - regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr; - regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq); -} - -static int lrc_ring_mi_mode(struct xe_hw_engine *hwe) -{ - struct xe_device *xe = gt_to_xe(hwe->gt); - - if (GRAPHICS_VERx100(xe) >= 1250) - return 0x70; - else - return 0x60; -} - -static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe) -{ - int x; - - x = lrc_ring_mi_mode(hwe); - regs[x + 1] &= ~STOP_RING; - regs[x + 1] |= STOP_RING << 16; -} - -static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc) -{ - return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE; -} - -static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc) -{ - return 0; -} - -u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc) -{ - return lrc->ring.size; -} - -/* Make the magic macros work */ -#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset -#define __xe_lrc_regs_offset xe_lrc_regs_offset - -#define LRC_SEQNO_PPHWSP_OFFSET 512 -#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8) -#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8) -#define LRC_PARALLEL_PPHWSP_OFFSET 2048 -#define LRC_PPHWSP_SIZE SZ_4K - -u32 xe_lrc_regs_offset(struct xe_lrc *lrc) -{ - return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE; -} - -static size_t lrc_reg_size(struct xe_device *xe) -{ - if (GRAPHICS_VERx100(xe) >= 1250) - return 96 * sizeof(u32); - else - return 80 * sizeof(u32); -} - -size_t xe_lrc_skip_size(struct xe_device *xe) -{ - return LRC_PPHWSP_SIZE + lrc_reg_size(xe); -} - -static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc) -{ - /* The seqno is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET; -} - -static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc) -{ - /* The start seqno is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET; -} - -static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc) -{ - /* The start seqno is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET; -} - -static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc) -{ - /* The parallel is stored in the driver-defined portion of PPHWSP */ - return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET; -} - -static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc) -{ - return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32); -} - -static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) -{ - /* Indirect ring state page is at the very end of LRC */ - return lrc->size - LRC_INDIRECT_RING_STATE_SIZE; -} - -#define DECL_MAP_ADDR_HELPERS(elem) \ -static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \ -{ \ - struct iosys_map map = lrc->bo->vmap; \ -\ - xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \ - iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \ - return map; \ -} \ -static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \ -{ \ - return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \ -} \ - -DECL_MAP_ADDR_HELPERS(ring) -DECL_MAP_ADDR_HELPERS(pphwsp) -DECL_MAP_ADDR_HELPERS(seqno) -DECL_MAP_ADDR_HELPERS(regs) -DECL_MAP_ADDR_HELPERS(start_seqno) -DECL_MAP_ADDR_HELPERS(ctx_job_timestamp) -DECL_MAP_ADDR_HELPERS(ctx_timestamp) -DECL_MAP_ADDR_HELPERS(parallel) -DECL_MAP_ADDR_HELPERS(indirect_ring) - -#undef DECL_MAP_ADDR_HELPERS - -/** - * xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address - * @lrc: Pointer to the lrc. - * - * Returns: ctx timestamp GGTT address - */ -u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_ctx_timestamp_ggtt_addr(lrc); -} - -/** - * xe_lrc_ctx_timestamp() - Read ctx timestamp value - * @lrc: Pointer to the lrc. - * - * Returns: ctx timestamp value - */ -u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_ctx_timestamp_map(lrc); - return xe_map_read32(xe, &map); -} - -/** - * xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address - * @lrc: Pointer to the lrc. - * - * Returns: ctx timestamp job GGTT address - */ -u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc); -} - -/** - * xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value - * @lrc: Pointer to the lrc. - * - * Returns: ctx timestamp job value - */ -u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_ctx_job_timestamp_map(lrc); - return xe_map_read32(xe, &map); -} - -u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_pphwsp_ggtt_addr(lrc); -} - -u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc) -{ - if (!xe_lrc_has_indirect_ring_state(lrc)) - return 0; - - return __xe_lrc_indirect_ring_ggtt_addr(lrc); -} - -static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_indirect_ring_map(lrc); - iosys_map_incr(&map, reg_nr * sizeof(u32)); - return xe_map_read32(xe, &map); -} - -static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc, - int reg_nr, u32 val) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_indirect_ring_map(lrc); - iosys_map_incr(&map, reg_nr * sizeof(u32)); - xe_map_write32(xe, &map, val); -} - -u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_regs_map(lrc); - iosys_map_incr(&map, reg_nr * sizeof(u32)); - return xe_map_read32(xe, &map); -} - -void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map map; - - map = __xe_lrc_regs_map(lrc); - iosys_map_incr(&map, reg_nr * sizeof(u32)); - xe_map_write32(xe, &map, val); -} - -static void *empty_lrc_data(struct xe_hw_engine *hwe) -{ - struct xe_gt *gt = hwe->gt; - void *data; - u32 *regs; - - data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL); - if (!data) - return NULL; - - /* 1st page: Per-Process of HW status Page */ - regs = data + LRC_PPHWSP_SIZE; - set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe); - set_context_control(regs, hwe); - set_memory_based_intr(regs, hwe); - reset_stop_ring(regs, hwe); - if (xe_gt_has_indirect_ring_state(gt)) { - regs = data + xe_gt_lrc_size(gt, hwe->class) - - LRC_INDIRECT_RING_STATE_SIZE; - set_offsets(regs, xe2_indirect_ring_state_offsets, hwe); - } - - return data; -} - -static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm) -{ - u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile); - - xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc)); - xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc)); -} - -static void xe_lrc_finish(struct xe_lrc *lrc) -{ - xe_hw_fence_ctx_finish(&lrc->fence_ctx); - xe_bo_lock(lrc->bo, false); - xe_bo_unpin(lrc->bo); - xe_bo_unlock(lrc->bo); - xe_bo_put(lrc->bo); -} - -#define PVC_CTX_ASID (0x2e + 1) -#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) - -static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, - struct xe_vm *vm, u32 ring_size) -{ - struct xe_gt *gt = hwe->gt; - struct xe_tile *tile = gt_to_tile(gt); - struct xe_device *xe = gt_to_xe(gt); - struct iosys_map map; - void *init_data = NULL; - u32 arb_enable; - u32 lrc_size; - int err; - - kref_init(&lrc->refcount); - lrc->flags = 0; - lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); - if (xe_gt_has_indirect_ring_state(gt)) - lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; - - /* - * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address - * via VM bind calls. - */ - lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE); - if (IS_ERR(lrc->bo)) - return PTR_ERR(lrc->bo); - - lrc->size = lrc_size; - lrc->tile = gt_to_tile(hwe->gt); - lrc->ring.size = ring_size; - lrc->ring.tail = 0; - lrc->ctx_timestamp = 0; - - xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, - hwe->fence_irq, hwe->name); - - if (!gt->default_lrc[hwe->class]) { - init_data = empty_lrc_data(hwe); - if (!init_data) { - err = -ENOMEM; - goto err_lrc_finish; - } - } - - /* - * Init Per-Process of HW status Page, LRC / context state to known - * values - */ - map = __xe_lrc_pphwsp_map(lrc); - if (!init_data) { - xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ - xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, - gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, - xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE); - } else { - xe_map_memcpy_to(xe, &map, 0, init_data, - xe_gt_lrc_size(gt, hwe->class)); - kfree(init_data); - } - - if (vm) { - xe_lrc_set_ppgtt(lrc, vm); - - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, lrc->bo); - } - - if (xe_gt_has_indirect_ring_state(gt)) { - xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE, - __xe_lrc_indirect_ring_ggtt_addr(lrc)); - - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START, - __xe_lrc_ring_ggtt_addr(lrc)); - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0); - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0); - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail); - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL, - RING_CTL_SIZE(lrc->ring.size) | RING_VALID); - } else { - xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc)); - xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0); - xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail); - xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL, - RING_CTL_SIZE(lrc->ring.size) | RING_VALID); - } - - xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0); - - if (xe->info.has_asid && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); - - lrc->desc = LRC_VALID; - lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); - /* TODO: Priority */ - - /* While this appears to have something about privileged batches or - * some such, it really just means PPGTT mode. - */ - if (vm) - lrc->desc |= LRC_PRIVILEGE; - - if (GRAPHICS_VERx100(xe) < 1250) { - lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance); - lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class); - } - - arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE; - xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable)); - - map = __xe_lrc_seqno_map(lrc); - xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); - - map = __xe_lrc_start_seqno_map(lrc); - xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1); - - return 0; - -err_lrc_finish: - xe_lrc_finish(lrc); - return err; -} - -/** - * xe_lrc_create - Create a LRC - * @hwe: Hardware Engine - * @vm: The VM (address space) - * @ring_size: LRC ring size - * - * Allocate and initialize the Logical Ring Context (LRC). - * - * Return pointer to created LRC upon success and an error pointer - * upon failure. - */ -struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm, - u32 ring_size) -{ - struct xe_lrc *lrc; - int err; - - lrc = kzalloc(sizeof(*lrc), GFP_KERNEL); - if (!lrc) - return ERR_PTR(-ENOMEM); - - err = xe_lrc_init(lrc, hwe, vm, ring_size); - if (err) { - kfree(lrc); - return ERR_PTR(err); - } - - return lrc; -} - -/** - * xe_lrc_destroy - Destroy the LRC - * @ref: reference to LRC - * - * Called when ref == 0, release resources held by the Logical Ring Context - * (LRC) and free the LRC memory. - */ -void xe_lrc_destroy(struct kref *ref) -{ - struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount); - - xe_lrc_finish(lrc); - kfree(lrc); -} - -void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail) -{ - if (xe_lrc_has_indirect_ring_state(lrc)) - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail); - else - xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail); -} - -u32 xe_lrc_ring_tail(struct xe_lrc *lrc) -{ - if (xe_lrc_has_indirect_ring_state(lrc)) - return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR; - else - return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR; -} - -void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) -{ - if (xe_lrc_has_indirect_ring_state(lrc)) - xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head); - else - xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head); -} - -u32 xe_lrc_ring_head(struct xe_lrc *lrc) -{ - if (xe_lrc_has_indirect_ring_state(lrc)) - return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR; - else - return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR; -} - -u32 xe_lrc_ring_space(struct xe_lrc *lrc) -{ - const u32 head = xe_lrc_ring_head(lrc); - const u32 tail = lrc->ring.tail; - const u32 size = lrc->ring.size; - - return ((head - tail - 1) & (size - 1)) + 1; -} - -static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring, - const void *data, size_t size) -{ - struct xe_device *xe = lrc_to_xe(lrc); - - iosys_map_incr(&ring, lrc->ring.tail); - xe_map_memcpy_to(xe, &ring, 0, data, size); - lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1); -} - -void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size) -{ - struct xe_device *xe = lrc_to_xe(lrc); - struct iosys_map ring; - u32 rhs; - size_t aligned_size; - - xe_assert(xe, IS_ALIGNED(size, 4)); - aligned_size = ALIGN(size, 8); - - ring = __xe_lrc_ring_map(lrc); - - xe_assert(xe, lrc->ring.tail < lrc->ring.size); - rhs = lrc->ring.size - lrc->ring.tail; - if (size > rhs) { - __xe_lrc_write_ring(lrc, ring, data, rhs); - __xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs); - } else { - __xe_lrc_write_ring(lrc, ring, data, size); - } - - if (aligned_size > size) { - u32 noop = MI_NOOP; - - __xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop)); - } -} - -u64 xe_lrc_descriptor(struct xe_lrc *lrc) -{ - return lrc->desc | xe_lrc_ggtt_addr(lrc); -} - -u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_seqno_ggtt_addr(lrc); -} - -/** - * xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence. - * - * Allocate but don't initialize an lrc seqno fence. - * - * Return: Pointer to the allocated fence or - * negative error pointer on error. - */ -struct dma_fence *xe_lrc_alloc_seqno_fence(void) -{ - return xe_hw_fence_alloc(); -} - -/** - * xe_lrc_free_seqno_fence() - Free an lrc seqno fence. - * @fence: Pointer to the fence to free. - * - * Frees an lrc seqno fence that hasn't yet been - * initialized. - */ -void xe_lrc_free_seqno_fence(struct dma_fence *fence) -{ - xe_hw_fence_free(fence); -} - -/** - * xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence. - * @lrc: Pointer to the lrc. - * @fence: Pointer to the fence to initialize. - * - * Initializes a pre-allocated lrc seqno fence. - * After initialization, the fence is subject to normal - * dma-fence refcounting. - */ -void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence) -{ - xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc)); -} - -s32 xe_lrc_seqno(struct xe_lrc *lrc) -{ - struct iosys_map map = __xe_lrc_seqno_map(lrc); - - return xe_map_read32(lrc_to_xe(lrc), &map); -} - -s32 xe_lrc_start_seqno(struct xe_lrc *lrc) -{ - struct iosys_map map = __xe_lrc_start_seqno_map(lrc); - - return xe_map_read32(lrc_to_xe(lrc), &map); -} - -u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_start_seqno_ggtt_addr(lrc); -} - -u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc) -{ - return __xe_lrc_parallel_ggtt_addr(lrc); -} - -struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc) -{ - return __xe_lrc_parallel_map(lrc); -} - -static int instr_dw(u32 cmd_header) -{ - /* GFXPIPE "SINGLE_DW" opcodes are a single dword */ - if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) == - GFXPIPE_SINGLE_DW_CMD(0, 0)) - return 1; - - /* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */ - if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST) - return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2; - - /* Most instructions have the # of dwords (minus 2) in 7:0 */ - return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2; -} - -static int dump_mi_command(struct drm_printer *p, - struct xe_gt *gt, - u32 *dw, - int remaining_dw) -{ - u32 inst_header = *dw; - u32 numdw = instr_dw(inst_header); - u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header); - int num_noop; - - /* First check for commands that don't have/use a '# DW' field */ - switch (inst_header & MI_OPCODE) { - case MI_NOOP: - num_noop = 1; - while (num_noop < remaining_dw && - (*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP) - num_noop++; - drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop); - return num_noop; - - case MI_TOPOLOGY_FILTER: - drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header); - return 1; - - case MI_BATCH_BUFFER_END: - drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header); - /* Return 'remaining_dw' to consume the rest of the LRC */ - return remaining_dw; - } - - /* - * Any remaining commands include a # of dwords. We should make sure - * it doesn't exceed the remaining size of the LRC. - */ - if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) - numdw = remaining_dw; - - switch (inst_header & MI_OPCODE) { - case MI_LOAD_REGISTER_IMM: - drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n", - inst_header, (numdw - 1) / 2); - for (int i = 1; i < numdw; i += 2) - drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]); - return numdw; - - case MI_LOAD_REGISTER_MEM & MI_OPCODE: - drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n", - inst_header, - dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "", - dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : ""); - if (numdw == 4) - drm_printf(p, " - %#6x = %#010llx\n", - dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2]))); - else - drm_printf(p, " - %*ph (%s)\n", - (int)sizeof(u32) * (numdw - 1), dw + 1, - numdw < 4 ? "truncated" : "malformed"); - return numdw; - - case MI_FORCE_WAKEUP: - drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header); - return numdw; - - default: - drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n", - inst_header, opcode, numdw); - return numdw; - } -} - -static int dump_gfxpipe_command(struct drm_printer *p, - struct xe_gt *gt, - u32 *dw, - int remaining_dw) -{ - u32 numdw = instr_dw(*dw); - u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw); - u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw); - u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw); - - /* - * Make sure we haven't mis-parsed a number of dwords that exceeds the - * remaining size of the LRC. - */ - if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) - numdw = remaining_dw; - - switch (*dw & GFXPIPE_MATCH_MASK) { -#define MATCH(cmd) \ - case cmd: \ - drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ - return numdw -#define MATCH3D(cmd) \ - case CMD_##cmd: \ - drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \ - return numdw - - MATCH(STATE_BASE_ADDRESS); - MATCH(STATE_SIP); - MATCH(GPGPU_CSR_BASE_ADDRESS); - MATCH(STATE_COMPUTE_MODE); - MATCH3D(3DSTATE_BTD); - MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS); - MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS); - - MATCH3D(3DSTATE_VF_STATISTICS); - - MATCH(PIPELINE_SELECT); - - MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST); - MATCH3D(3DSTATE_CLEAR_PARAMS); - MATCH3D(3DSTATE_DEPTH_BUFFER); - MATCH3D(3DSTATE_STENCIL_BUFFER); - MATCH3D(3DSTATE_HIER_DEPTH_BUFFER); - MATCH3D(3DSTATE_VERTEX_BUFFERS); - MATCH3D(3DSTATE_VERTEX_ELEMENTS); - MATCH3D(3DSTATE_INDEX_BUFFER); - MATCH3D(3DSTATE_VF); - MATCH3D(3DSTATE_MULTISAMPLE); - MATCH3D(3DSTATE_CC_STATE_POINTERS); - MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS); - MATCH3D(3DSTATE_VS); - MATCH3D(3DSTATE_GS); - MATCH3D(3DSTATE_CLIP); - MATCH3D(3DSTATE_SF); - MATCH3D(3DSTATE_WM); - MATCH3D(3DSTATE_CONSTANT_VS); - MATCH3D(3DSTATE_CONSTANT_GS); - MATCH3D(3DSTATE_CONSTANT_PS); - MATCH3D(3DSTATE_SAMPLE_MASK); - MATCH3D(3DSTATE_CONSTANT_HS); - MATCH3D(3DSTATE_CONSTANT_DS); - MATCH3D(3DSTATE_HS); - MATCH3D(3DSTATE_TE); - MATCH3D(3DSTATE_DS); - MATCH3D(3DSTATE_STREAMOUT); - MATCH3D(3DSTATE_SBE); - MATCH3D(3DSTATE_PS); - MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); - MATCH3D(3DSTATE_CPS_POINTERS); - MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC); - MATCH3D(3DSTATE_BLEND_STATE_POINTERS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS); - MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS); - MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS); - MATCH3D(3DSTATE_VF_INSTANCING); - MATCH3D(3DSTATE_VF_SGVS); - MATCH3D(3DSTATE_VF_TOPOLOGY); - MATCH3D(3DSTATE_WM_CHROMAKEY); - MATCH3D(3DSTATE_PS_BLEND); - MATCH3D(3DSTATE_WM_DEPTH_STENCIL); - MATCH3D(3DSTATE_PS_EXTRA); - MATCH3D(3DSTATE_RASTER); - MATCH3D(3DSTATE_SBE_SWIZ); - MATCH3D(3DSTATE_WM_HZ_OP); - MATCH3D(3DSTATE_VF_COMPONENT_PACKING); - MATCH3D(3DSTATE_VF_SGVS_2); - MATCH3D(3DSTATE_VFG); - MATCH3D(3DSTATE_URB_ALLOC_VS); - MATCH3D(3DSTATE_URB_ALLOC_HS); - MATCH3D(3DSTATE_URB_ALLOC_DS); - MATCH3D(3DSTATE_URB_ALLOC_GS); - MATCH3D(3DSTATE_SO_BUFFER_INDEX_0); - MATCH3D(3DSTATE_SO_BUFFER_INDEX_1); - MATCH3D(3DSTATE_SO_BUFFER_INDEX_2); - MATCH3D(3DSTATE_SO_BUFFER_INDEX_3); - MATCH3D(3DSTATE_PRIMITIVE_REPLICATION); - MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO); - MATCH3D(3DSTATE_AMFS); - MATCH3D(3DSTATE_DEPTH_BOUNDS); - MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS); - MATCH3D(3DSTATE_CONSTANT_TS_POINTER); - MATCH3D(3DSTATE_MESH_CONTROL); - MATCH3D(3DSTATE_MESH_DISTRIB); - MATCH3D(3DSTATE_TASK_REDISTRIB); - MATCH3D(3DSTATE_MESH_SHADER); - MATCH3D(3DSTATE_MESH_SHADER_DATA); - MATCH3D(3DSTATE_TASK_CONTROL); - MATCH3D(3DSTATE_TASK_SHADER); - MATCH3D(3DSTATE_TASK_SHADER_DATA); - MATCH3D(3DSTATE_URB_ALLOC_MESH); - MATCH3D(3DSTATE_URB_ALLOC_TASK); - MATCH3D(3DSTATE_CLIP_MESH); - MATCH3D(3DSTATE_SBE_MESH); - MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER); - - MATCH3D(3DSTATE_DRAWING_RECTANGLE); - MATCH3D(3DSTATE_CHROMA_KEY); - MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET); - MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN); - MATCH3D(3DSTATE_LINE_STIPPLE); - MATCH3D(3DSTATE_AA_LINE_PARAMETERS); - MATCH3D(3DSTATE_MONOFILTER_SIZE); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS); - MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS); - MATCH3D(3DSTATE_SO_DECL_LIST); - MATCH3D(3DSTATE_SO_BUFFER); - MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC); - MATCH3D(3DSTATE_SAMPLE_PATTERN); - MATCH3D(3DSTATE_3D_MODE); - MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE); - MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS); - MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO); - - default: - drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n", - *dw, pipeline, opcode, subopcode, numdw); - return numdw; - } -} - -static int dump_gfx_state_command(struct drm_printer *p, - struct xe_gt *gt, - u32 *dw, - int remaining_dw) -{ - u32 numdw = instr_dw(*dw); - u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw); - - /* - * Make sure we haven't mis-parsed a number of dwords that exceeds the - * remaining size of the LRC. - */ - if (xe_gt_WARN_ON(gt, numdw > remaining_dw)) - numdw = remaining_dw; - - switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) { - MATCH(STATE_WRITE_INLINE); - - default: - drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n", - *dw, opcode, numdw); - return numdw; - } -} - -void xe_lrc_dump_default(struct drm_printer *p, - struct xe_gt *gt, - enum xe_engine_class hwe_class) -{ - u32 *dw; - int remaining_dw, num_dw; - - if (!gt->default_lrc[hwe_class]) { - drm_printf(p, "No default LRC for class %d\n", hwe_class); - return; - } - - /* - * Skip the beginning of the LRC since it contains the per-process - * hardware status page. - */ - dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE; - remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4; - - while (remaining_dw > 0) { - if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) { - num_dw = dump_mi_command(p, gt, dw, remaining_dw); - } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) { - num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw); - } else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) { - num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw); - } else { - num_dw = min(instr_dw(*dw), remaining_dw); - drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n", - *dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw), - num_dw); - } - - dw += num_dw; - remaining_dw -= num_dw; - } -} - -struct instr_state { - u32 instr; - u16 num_dw; -}; - -static const struct instr_state xe_hpg_svg_state[] = { - { .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 }, - { .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 }, - { .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 }, - { .instr = CMD_3DSTATE_VS, .num_dw = 9 }, - { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 }, - { .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 }, - { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 }, - { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 }, - { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 }, - { .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 }, - { .instr = CMD_3DSTATE_CLIP, .num_dw = 4 }, - { .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 }, - { .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SF, .num_dw = 4 }, - { .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 }, - { .instr = CMD_3DSTATE_RASTER, .num_dw = 5 }, - { .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 }, - { .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 }, - { .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 }, - { .instr = CMD_3DSTATE_HS, .num_dw = 9 }, - { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 }, - { .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 }, - { .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 }, - { .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 }, - { .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 }, - { .instr = CMD_3DSTATE_TE, .num_dw = 5 }, - { .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 }, - { .instr = CMD_3DSTATE_DS, .num_dw = 11 }, - { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 }, - { .instr = CMD_3DSTATE_GS, .num_dw = 10 }, - { .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 }, - { .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 }, - { .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 }, - { .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 }, - { .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 }, - { .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 }, - { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, -}; - -void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) -{ - struct xe_gt *gt = q->hwe->gt; - struct xe_device *xe = gt_to_xe(gt); - const struct instr_state *state_table = NULL; - int state_table_size = 0; - - /* - * Wa_14019789679 - * - * If the driver doesn't explicitly emit the SVG instructions while - * setting up the default LRC, the context switch will write 0's - * (noops) into the LRC memory rather than the expected instruction - * headers. Application contexts start out as a copy of the default - * LRC, and if they also do not emit specific settings for some SVG - * state, then on context restore they'll unintentionally inherit - * whatever state setting the previous context had programmed into the - * hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will - * prevent the hardware from resetting that state back to any specific - * value). - * - * The official workaround only requires emitting 3DSTATE_MESH_CONTROL - * since that's a specific state setting that can easily cause GPU - * hangs if unintentionally inherited. However to be safe we'll - * continue to emit all of the SVG state since it's best not to leak - * any of the state between contexts, even if that leakage is harmless. - */ - if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) { - state_table = xe_hpg_svg_state; - state_table_size = ARRAY_SIZE(xe_hpg_svg_state); - } - - if (!state_table) { - xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", - GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); - return; - } - - for (int i = 0; i < state_table_size; i++) { - u32 instr = state_table[i].instr; - u16 num_dw = state_table[i].num_dw; - bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW); - - xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE); - xe_gt_assert(gt, num_dw != 0); - xe_gt_assert(gt, is_single_dw ^ (num_dw > 1)); - - /* - * Xe2's SVG context is the same as the one on DG2 / MTL - * except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has - * been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined). - * Just make the replacement here rather than defining a - * whole separate table for the single trivial change. - */ - if (GRAPHICS_VER(xe) >= 20 && - instr == CMD_3DSTATE_DRAWING_RECTANGLE) - instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; - - bb->cs[bb->len] = instr; - if (!is_single_dw) - bb->cs[bb->len] |= (num_dw - 2); - - bb->len += num_dw; - } -} - -struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) -{ - struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT); - - if (!snapshot) - return NULL; - -<<<<<<< - if (lrc->bo && lrc->bo->vm) -======= - if (lrc->bo->vm) ->>>>>>> - xe_vm_get(lrc->bo->vm); - - snapshot->context_desc = xe_lrc_ggtt_addr(lrc); - snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); - snapshot->head = xe_lrc_ring_head(lrc); - snapshot->tail.internal = lrc->ring.tail; - snapshot->tail.memory = xe_lrc_ring_tail(lrc); - snapshot->start_seqno = xe_lrc_start_seqno(lrc); - snapshot->seqno = xe_lrc_seqno(lrc); - snapshot->lrc_bo = xe_bo_get(lrc->bo); - snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); - snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset; - snapshot->lrc_snapshot = NULL; - snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); - snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); - return snapshot; -} - -void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) -{ - struct xe_bo *bo; - struct xe_vm *vm; - struct iosys_map src; - - if (!snapshot) - return; - - bo = snapshot->lrc_bo; - vm = bo->vm; - snapshot->lrc_bo = NULL; - - snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); - if (!snapshot->lrc_snapshot) - goto put_bo; - - xe_bo_lock(bo, false); - if (!ttm_bo_vmap(&bo->ttm, &src)) { - xe_map_memcpy_from(xe_bo_device(bo), - snapshot->lrc_snapshot, &src, snapshot->lrc_offset, - snapshot->lrc_size); - ttm_bo_vunmap(&bo->ttm, &src); - } else { - kvfree(snapshot->lrc_snapshot); - snapshot->lrc_snapshot = NULL; - } - xe_bo_unlock(bo); -put_bo: - xe_bo_put(bo); - if (vm) - xe_vm_put(vm); -} - -void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) -{ - unsigned long i; - - if (!snapshot) - return; - - drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); - drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n", - snapshot->indirect_context_desc); - drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); - drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", - snapshot->tail.internal, snapshot->tail.memory); - drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); - drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); - drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp); - drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp); - - if (!snapshot->lrc_snapshot) - return; - - drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE); - drm_puts(p, "\t[HWSP].data: "); - for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) { - u32 *val = snapshot->lrc_snapshot + i; - char dumped[ASCII85_BUFSZ]; - - drm_puts(p, ascii85_encode(*val, dumped)); - } - - drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE); - drm_puts(p, "\t[HWCTX].data: "); - for (; i < snapshot->lrc_size; i += sizeof(u32)) { - u32 *val = snapshot->lrc_snapshot + i; - char dumped[ASCII85_BUFSZ]; - - drm_puts(p, ascii85_encode(*val, dumped)); - } - drm_puts(p, "\n"); -} - -void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) -{ - if (!snapshot) - return; - - kvfree(snapshot->lrc_snapshot); - if (snapshot->lrc_bo) { - struct xe_vm *vm; - - vm = snapshot->lrc_bo->vm; - xe_bo_put(snapshot->lrc_bo); - if (vm) - xe_vm_put(vm); - } - kfree(snapshot); -} - -/** - * xe_lrc_update_timestamp() - Update ctx timestamp - * @lrc: Pointer to the lrc. - * @old_ts: Old timestamp value - * - * Populate @old_ts current saved ctx timestamp, read new ctx timestamp and - * update saved value. - * - * Returns: New ctx timestamp value - */ -u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) -{ - *old_ts = lrc->ctx_timestamp; - - lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); - - return lrc->ctx_timestamp; -} diff --git a/rr-cache/641dfaef393e85daf275c6725aed0c69ef5046ad/preimage b/rr-cache/641dfaef393e85daf275c6725aed0c69ef5046ad/preimage new file mode 100644 index 000000000000..be64058af509 --- /dev/null +++ b/rr-cache/641dfaef393e85daf275c6725aed0c69ef5046ad/preimage @@ -0,0 +1,1089 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 Broadcom + */ + +/** + * DOC: VC4 HVS module. + * + * The Hardware Video Scaler (HVS) is the piece of hardware that does + * translation, scaling, colorspace conversion, and compositing of + * pixels stored in framebuffers into a FIFO of pixels going out to + * the Pixel Valve (CRTC). It operates at the system clock rate (the + * system audio clock gate, specifically), which is much higher than + * the pixel clock rate. + * + * There is a single global HVS, with multiple output FIFOs that can + * be consumed by the PVs. This file just manages the resources for + * the HVS, while the vc4_crtc.c code actually drives HVS setup for + * each CRTC. + */ + +#include <linux/bitfield.h> +#include <linux/clk.h> +#include <linux/component.h> +#include <linux/platform_device.h> + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_drv.h> +#include <drm/drm_vblank.h> + +#include <soc/bcm2835/raspberrypi-firmware.h> + +#include "vc4_drv.h" +#include "vc4_regs.h" + +static const struct debugfs_reg32 hvs_regs[] = { + VC4_REG32(SCALER_DISPCTRL), + VC4_REG32(SCALER_DISPSTAT), + VC4_REG32(SCALER_DISPID), + VC4_REG32(SCALER_DISPECTRL), + VC4_REG32(SCALER_DISPPROF), + VC4_REG32(SCALER_DISPDITHER), + VC4_REG32(SCALER_DISPEOLN), + VC4_REG32(SCALER_DISPLIST0), + VC4_REG32(SCALER_DISPLIST1), + VC4_REG32(SCALER_DISPLIST2), + VC4_REG32(SCALER_DISPLSTAT), + VC4_REG32(SCALER_DISPLACT0), + VC4_REG32(SCALER_DISPLACT1), + VC4_REG32(SCALER_DISPLACT2), + VC4_REG32(SCALER_DISPCTRL0), + VC4_REG32(SCALER_DISPBKGND0), + VC4_REG32(SCALER_DISPSTAT0), + VC4_REG32(SCALER_DISPBASE0), + VC4_REG32(SCALER_DISPCTRL1), + VC4_REG32(SCALER_DISPBKGND1), + VC4_REG32(SCALER_DISPSTAT1), + VC4_REG32(SCALER_DISPBASE1), + VC4_REG32(SCALER_DISPCTRL2), + VC4_REG32(SCALER_DISPBKGND2), + VC4_REG32(SCALER_DISPSTAT2), + VC4_REG32(SCALER_DISPBASE2), + VC4_REG32(SCALER_DISPALPHA2), + VC4_REG32(SCALER_OLEDOFFS), + VC4_REG32(SCALER_OLEDCOEF0), + VC4_REG32(SCALER_OLEDCOEF1), + VC4_REG32(SCALER_OLEDCOEF2), +}; + +void vc4_hvs_dump_state(struct vc4_hvs *hvs) +{ + struct drm_device *drm = &hvs->vc4->base; + struct drm_printer p = drm_info_printer(&hvs->pdev->dev); + int idx, i; + + if (!drm_dev_enter(drm, &idx)) + return; + + drm_print_regset32(&p, &hvs->regset); + + DRM_INFO("HVS ctx:\n"); + for (i = 0; i < 64; i += 4) { + DRM_INFO("0x%08x (%s): 0x%08x 0x%08x 0x%08x 0x%08x\n", + i * 4, i < HVS_BOOTLOADER_DLIST_END ? "B" : "D", + readl((u32 __iomem *)hvs->dlist + i + 0), + readl((u32 __iomem *)hvs->dlist + i + 1), + readl((u32 __iomem *)hvs->dlist + i + 2), + readl((u32 __iomem *)hvs->dlist + i + 3)); + } + + drm_dev_exit(idx); +} + +static int vc4_hvs_debugfs_underrun(struct seq_file *m, void *data) +{ + struct drm_debugfs_entry *entry = m->private; + struct drm_device *dev = entry->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct drm_printer p = drm_seq_file_printer(m); + + drm_printf(&p, "%d\n", atomic_read(&vc4->underrun)); + + return 0; +} + +static int vc4_hvs_debugfs_dlist(struct seq_file *m, void *data) +{ + struct drm_debugfs_entry *entry = m->private; + struct drm_device *dev = entry->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_hvs *hvs = vc4->hvs; + struct drm_printer p = drm_seq_file_printer(m); + unsigned int next_entry_start = 0; + unsigned int i, j; + u32 dlist_word, dispstat; + + for (i = 0; i < SCALER_CHANNELS_COUNT; i++) { + dispstat = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTATX(i)), + SCALER_DISPSTATX_MODE); + if (dispstat == SCALER_DISPSTATX_MODE_DISABLED || + dispstat == SCALER_DISPSTATX_MODE_EOF) { + drm_printf(&p, "HVS chan %u disabled\n", i); + continue; + } + + drm_printf(&p, "HVS chan %u:\n", i); + + for (j = HVS_READ(SCALER_DISPLISTX(i)); j < 256; j++) { + dlist_word = readl((u32 __iomem *)vc4->hvs->dlist + j); + drm_printf(&p, "dlist: %02d: 0x%08x\n", j, + dlist_word); + if (!next_entry_start || + next_entry_start == j) { + if (dlist_word & SCALER_CTL0_END) + break; + next_entry_start = j + + VC4_GET_FIELD(dlist_word, + SCALER_CTL0_SIZE); + } + } + } + + return 0; +} + +/* The filter kernel is composed of dwords each containing 3 9-bit + * signed integers packed next to each other. + */ +#define VC4_INT_TO_COEFF(coeff) (coeff & 0x1ff) +#define VC4_PPF_FILTER_WORD(c0, c1, c2) \ + ((((c0) & 0x1ff) << 0) | \ + (((c1) & 0x1ff) << 9) | \ + (((c2) & 0x1ff) << 18)) + +/* The whole filter kernel is arranged as the coefficients 0-16 going + * up, then a pad, then 17-31 going down and reversed within the + * dwords. This means that a linear phase kernel (where it's + * symmetrical at the boundary between 15 and 16) has the last 5 + * dwords matching the first 5, but reversed. + */ +#define VC4_LINEAR_PHASE_KERNEL(c0, c1, c2, c3, c4, c5, c6, c7, c8, \ + c9, c10, c11, c12, c13, c14, c15) \ + {VC4_PPF_FILTER_WORD(c0, c1, c2), \ + VC4_PPF_FILTER_WORD(c3, c4, c5), \ + VC4_PPF_FILTER_WORD(c6, c7, c8), \ + VC4_PPF_FILTER_WORD(c9, c10, c11), \ + VC4_PPF_FILTER_WORD(c12, c13, c14), \ + VC4_PPF_FILTER_WORD(c15, c15, 0)} + +#define VC4_LINEAR_PHASE_KERNEL_DWORDS 6 +#define VC4_KERNEL_DWORDS (VC4_LINEAR_PHASE_KERNEL_DWORDS * 2 - 1) + +/* Recommended B=1/3, C=1/3 filter choice from Mitchell/Netravali. + * http://www.cs.utexas.edu/~fussell/courses/cs384g/lectures/mitchell/Mitchell.pdf + */ +static const u32 mitchell_netravali_1_3_1_3_kernel[] = + VC4_LINEAR_PHASE_KERNEL(0, -2, -6, -8, -10, -8, -3, 2, 18, + 50, 82, 119, 155, 187, 213, 227); + +static int vc4_hvs_upload_linear_kernel(struct vc4_hvs *hvs, + struct drm_mm_node *space, + const u32 *kernel) +{ + int ret, i; + u32 __iomem *dst_kernel; + + /* + * NOTE: We don't need a call to drm_dev_enter()/drm_dev_exit() + * here since that function is only called from vc4_hvs_bind(). + */ + + ret = drm_mm_insert_node(&hvs->dlist_mm, space, VC4_KERNEL_DWORDS); + if (ret) { + drm_err(&hvs->vc4->base, "Failed to allocate space for filter kernel: %d\n", + ret); + return ret; + } + + dst_kernel = hvs->dlist + space->start; + + for (i = 0; i < VC4_KERNEL_DWORDS; i++) { + if (i < VC4_LINEAR_PHASE_KERNEL_DWORDS) + writel(kernel[i], &dst_kernel[i]); + else { + writel(kernel[VC4_KERNEL_DWORDS - i - 1], + &dst_kernel[i]); + } + } + + return 0; +} + +static void vc4_hvs_lut_load(struct vc4_hvs *hvs, + struct vc4_crtc *vc4_crtc) +{ + struct drm_device *drm = &hvs->vc4->base; + struct drm_crtc *crtc = &vc4_crtc->base; + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state); + int idx; + u32 i; + + if (!drm_dev_enter(drm, &idx)) + return; + +<<<<<<< +======= + if (hvs->vc4->gen == VC4_GEN_4) + goto exit; + +>>>>>>> + /* The LUT memory is laid out with each HVS channel in order, + * each of which takes 256 writes for R, 256 for G, then 256 + * for B. + */ + HVS_WRITE(SCALER_GAMADDR, + SCALER_GAMADDR_AUTOINC | + (vc4_state->assigned_channel * 3 * crtc->gamma_size)); + + for (i = 0; i < crtc->gamma_size; i++) + HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_r[i]); + for (i = 0; i < crtc->gamma_size; i++) + HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_g[i]); + for (i = 0; i < crtc->gamma_size; i++) + HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_b[i]); + +exit: + drm_dev_exit(idx); +} + +static void vc4_hvs_update_gamma_lut(struct vc4_hvs *hvs, + struct vc4_crtc *vc4_crtc) +{ + struct drm_crtc_state *crtc_state = vc4_crtc->base.state; + struct drm_color_lut *lut = crtc_state->gamma_lut->data; + u32 length = drm_color_lut_size(crtc_state->gamma_lut); + u32 i; + + for (i = 0; i < length; i++) { + vc4_crtc->lut_r[i] = drm_color_lut_extract(lut[i].red, 8); + vc4_crtc->lut_g[i] = drm_color_lut_extract(lut[i].green, 8); + vc4_crtc->lut_b[i] = drm_color_lut_extract(lut[i].blue, 8); + } + + vc4_hvs_lut_load(hvs, vc4_crtc); +} + +u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo) +{ + struct drm_device *drm = &hvs->vc4->base; + u8 field = 0; + int idx; + + if (!drm_dev_enter(drm, &idx)) + return 0; + + switch (fifo) { + case 0: + field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1), + SCALER_DISPSTAT1_FRCNT0); + break; + case 1: + field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1), + SCALER_DISPSTAT1_FRCNT1); + break; + case 2: + field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT2), + SCALER_DISPSTAT2_FRCNT2); + break; + } + + drm_dev_exit(idx); + return field; +} + +int vc4_hvs_get_fifo_from_output(struct vc4_hvs *hvs, unsigned int output) +{ + struct vc4_dev *vc4 = hvs->vc4; + u32 reg; + int ret; + + if (!vc4->is_vc5) + return output; + + /* + * NOTE: We should probably use drm_dev_enter()/drm_dev_exit() + * here, but this function is only used during the DRM device + * initialization, so we should be fine. + */ + + switch (output) { + case 0: + return 0; + + case 1: + return 1; + + case 2: + reg = HVS_READ(SCALER_DISPECTRL); + ret = FIELD_GET(SCALER_DISPECTRL_DSP2_MUX_MASK, reg); + if (ret == 0) + return 2; + + return 0; + + case 3: + reg = HVS_READ(SCALER_DISPCTRL); + ret = FIELD_GET(SCALER_DISPCTRL_DSP3_MUX_MASK, reg); + if (ret == 3) + return -EPIPE; + + return ret; + + case 4: + reg = HVS_READ(SCALER_DISPEOLN); + ret = FIELD_GET(SCALER_DISPEOLN_DSP4_MUX_MASK, reg); + if (ret == 3) + return -EPIPE; + + return ret; + + case 5: + reg = HVS_READ(SCALER_DISPDITHER); + ret = FIELD_GET(SCALER_DISPDITHER_DSP5_MUX_MASK, reg); + if (ret == 3) + return -EPIPE; + + return ret; + + default: + return -EPIPE; + } +} + +static int vc4_hvs_init_channel(struct vc4_hvs *hvs, struct drm_crtc *crtc, + struct drm_display_mode *mode, bool oneshot) +{ + struct vc4_dev *vc4 = hvs->vc4; + struct drm_device *drm = &vc4->base; + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); + struct vc4_crtc_state *vc4_crtc_state = to_vc4_crtc_state(crtc->state); + unsigned int chan = vc4_crtc_state->assigned_channel; + bool interlace = mode->flags & DRM_MODE_FLAG_INTERLACE; + u32 dispbkgndx; + u32 dispctrl; + int idx; + + if (!drm_dev_enter(drm, &idx)) + return -ENODEV; + + HVS_WRITE(SCALER_DISPCTRLX(chan), 0); + HVS_WRITE(SCALER_DISPCTRLX(chan), SCALER_DISPCTRLX_RESET); + HVS_WRITE(SCALER_DISPCTRLX(chan), 0); + + /* Turn on the scaler, which will wait for vstart to start + * compositing. + * When feeding the transposer, we should operate in oneshot + * mode. + */ + dispctrl = SCALER_DISPCTRLX_ENABLE; + dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(chan)); + + if (!vc4->is_vc5) { + dispctrl |= VC4_SET_FIELD(mode->hdisplay, + SCALER_DISPCTRLX_WIDTH) | + VC4_SET_FIELD(mode->vdisplay, + SCALER_DISPCTRLX_HEIGHT) | + (oneshot ? SCALER_DISPCTRLX_ONESHOT : 0); + dispbkgndx |= SCALER_DISPBKGND_AUTOHS; + } else { + dispctrl |= VC4_SET_FIELD(mode->hdisplay, + SCALER5_DISPCTRLX_WIDTH) | + VC4_SET_FIELD(mode->vdisplay, + SCALER5_DISPCTRLX_HEIGHT) | + (oneshot ? SCALER5_DISPCTRLX_ONESHOT : 0); + dispbkgndx &= ~SCALER5_DISPBKGND_BCK2BCK; + } + + HVS_WRITE(SCALER_DISPCTRLX(chan), dispctrl); + + dispbkgndx &= ~SCALER_DISPBKGND_GAMMA; + dispbkgndx &= ~SCALER_DISPBKGND_INTERLACE; + + HVS_WRITE(SCALER_DISPBKGNDX(chan), dispbkgndx | + ((!vc4->is_vc5) ? SCALER_DISPBKGND_GAMMA : 0) | + (interlace ? SCALER_DISPBKGND_INTERLACE : 0)); + + /* Reload the LUT, since the SRAMs would have been disabled if + * all CRTCs had SCALER_DISPBKGND_GAMMA unset at once. + */ + vc4_hvs_lut_load(hvs, vc4_crtc); + + drm_dev_exit(idx); + + return 0; +} + +void vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan) +{ + struct drm_device *drm = &hvs->vc4->base; + int idx; + + if (!drm_dev_enter(drm, &idx)) + return; + + if (HVS_READ(SCALER_DISPCTRLX(chan)) & SCALER_DISPCTRLX_ENABLE) + goto out; + + HVS_WRITE(SCALER_DISPCTRLX(chan), + HVS_READ(SCALER_DISPCTRLX(chan)) | SCALER_DISPCTRLX_RESET); + HVS_WRITE(SCALER_DISPCTRLX(chan), + HVS_READ(SCALER_DISPCTRLX(chan)) & ~SCALER_DISPCTRLX_ENABLE); + + /* Once we leave, the scaler should be disabled and its fifo empty. */ + WARN_ON_ONCE(HVS_READ(SCALER_DISPCTRLX(chan)) & SCALER_DISPCTRLX_RESET); + + WARN_ON_ONCE(VC4_GET_FIELD(HVS_READ(SCALER_DISPSTATX(chan)), + SCALER_DISPSTATX_MODE) != + SCALER_DISPSTATX_MODE_DISABLED); + + WARN_ON_ONCE((HVS_READ(SCALER_DISPSTATX(chan)) & + (SCALER_DISPSTATX_FULL | SCALER_DISPSTATX_EMPTY)) != + SCALER_DISPSTATX_EMPTY); + +out: + drm_dev_exit(idx); +} + +int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state) +{ + struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc); + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc_state); + struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct drm_plane *plane; + unsigned long flags; + const struct drm_plane_state *plane_state; + u32 dlist_count = 0; + int ret; + + /* The pixelvalve can only feed one encoder (and encoders are + * 1:1 with connectors.) + */ + if (hweight32(crtc_state->connector_mask) > 1) + return -EINVAL; + + drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) + dlist_count += vc4_plane_dlist_size(plane_state); + + dlist_count++; /* Account for SCALER_CTL0_END. */ + + spin_lock_irqsave(&vc4->hvs->mm_lock, flags); + ret = drm_mm_insert_node(&vc4->hvs->dlist_mm, &vc4_state->mm, + dlist_count); + spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags); + if (ret) + return ret; + + return 0; +} + +static void vc4_hvs_install_dlist(struct drm_crtc *crtc) +{ + struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_hvs *hvs = vc4->hvs; + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state); + int idx; + + if (!drm_dev_enter(dev, &idx)) + return; + + HVS_WRITE(SCALER_DISPLISTX(vc4_state->assigned_channel), + vc4_state->mm.start); + + drm_dev_exit(idx); +} + +static void vc4_hvs_update_dlist(struct drm_crtc *crtc) +{ + struct drm_device *dev = crtc->dev; + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state); + unsigned long flags; + + if (crtc->state->event) { + crtc->state->event->pipe = drm_crtc_index(crtc); + + WARN_ON(drm_crtc_vblank_get(crtc) != 0); + + spin_lock_irqsave(&dev->event_lock, flags); + + if (!vc4_crtc->feeds_txp || vc4_state->txp_armed) { + vc4_crtc->event = crtc->state->event; + crtc->state->event = NULL; + } + + spin_unlock_irqrestore(&dev->event_lock, flags); + } + + spin_lock_irqsave(&vc4_crtc->irq_lock, flags); + vc4_crtc->current_dlist = vc4_state->mm.start; + spin_unlock_irqrestore(&vc4_crtc->irq_lock, flags); +} + +void vc4_hvs_atomic_begin(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state); + unsigned long flags; + + spin_lock_irqsave(&vc4_crtc->irq_lock, flags); + vc4_crtc->current_hvs_channel = vc4_state->assigned_channel; + spin_unlock_irqrestore(&vc4_crtc->irq_lock, flags); +} + +void vc4_hvs_atomic_enable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct drm_display_mode *mode = &crtc->state->adjusted_mode; + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); + bool oneshot = vc4_crtc->feeds_txp; + + vc4_hvs_install_dlist(crtc); + vc4_hvs_update_dlist(crtc); + vc4_hvs_init_channel(vc4->hvs, crtc, mode, oneshot); +} + +void vc4_hvs_atomic_disable(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct drm_crtc_state *old_state = drm_atomic_get_old_crtc_state(state, crtc); + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(old_state); + unsigned int chan = vc4_state->assigned_channel; + + vc4_hvs_stop_channel(vc4->hvs, chan); +} + +void vc4_hvs_atomic_flush(struct drm_crtc *crtc, + struct drm_atomic_state *state) +{ + struct drm_crtc_state *old_state = drm_atomic_get_old_crtc_state(state, + crtc); + struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_hvs *hvs = vc4->hvs; + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state); + unsigned int channel = vc4_state->assigned_channel; + struct drm_plane *plane; + struct vc4_plane_state *vc4_plane_state; + bool debug_dump_regs = false; + bool enable_bg_fill = false; + u32 __iomem *dlist_start = vc4->hvs->dlist + vc4_state->mm.start; + u32 __iomem *dlist_next = dlist_start; + unsigned int zpos = 0; + bool found = false; + int idx; + + if (!drm_dev_enter(dev, &idx)) { + vc4_crtc_send_vblank(crtc); + return; + } + + if (vc4_state->assigned_channel == VC4_HVS_CHANNEL_DISABLED) + return; + + if (debug_dump_regs) { + DRM_INFO("CRTC %d HVS before:\n", drm_crtc_index(crtc)); + vc4_hvs_dump_state(hvs); + } + + /* Copy all the active planes' dlist contents to the hardware dlist. */ + do { + found = false; + + drm_atomic_crtc_for_each_plane(plane, crtc) { + if (plane->state->normalized_zpos != zpos) + continue; + + /* Is this the first active plane? */ + if (dlist_next == dlist_start) { + /* We need to enable background fill when a plane + * could be alpha blending from the background, i.e. + * where no other plane is underneath. It suffices to + * consider the first active plane here since we set + * needs_bg_fill such that either the first plane + * already needs it or all planes on top blend from + * the first or a lower plane. + */ + vc4_plane_state = to_vc4_plane_state(plane->state); + enable_bg_fill = vc4_plane_state->needs_bg_fill; + } + + dlist_next += vc4_plane_write_dlist(plane, dlist_next); + + found = true; + } + + zpos++; + } while (found); + + writel(SCALER_CTL0_END, dlist_next); + dlist_next++; + + WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size); + + if (enable_bg_fill) + /* This sets a black background color fill, as is the case + * with other DRM drivers. + */ + HVS_WRITE(SCALER_DISPBKGNDX(channel), + HVS_READ(SCALER_DISPBKGNDX(channel)) | + SCALER_DISPBKGND_FILL); + + /* Only update DISPLIST if the CRTC was already running and is not + * being disabled. + * vc4_crtc_enable() takes care of updating the dlist just after + * re-enabling VBLANK interrupts and before enabling the engine. + * If the CRTC is being disabled, there's no point in updating this + * information. + */ + if (crtc->state->active && old_state->active) { + vc4_hvs_install_dlist(crtc); + vc4_hvs_update_dlist(crtc); + } + + if (crtc->state->color_mgmt_changed) { + u32 dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(channel)); + + if (crtc->state->gamma_lut) { + vc4_hvs_update_gamma_lut(hvs, vc4_crtc); + dispbkgndx |= SCALER_DISPBKGND_GAMMA; + } else { + /* Unsetting DISPBKGND_GAMMA skips the gamma lut step + * in hardware, which is the same as a linear lut that + * DRM expects us to use in absence of a user lut. + */ + dispbkgndx &= ~SCALER_DISPBKGND_GAMMA; + } + HVS_WRITE(SCALER_DISPBKGNDX(channel), dispbkgndx); + } + + if (debug_dump_regs) { + DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc)); + vc4_hvs_dump_state(hvs); + } + + drm_dev_exit(idx); +} + +void vc4_hvs_mask_underrun(struct vc4_hvs *hvs, int channel) +{ + struct drm_device *drm = &hvs->vc4->base; + u32 dispctrl; + int idx; + + if (!drm_dev_enter(drm, &idx)) + return; + + dispctrl = HVS_READ(SCALER_DISPCTRL); + dispctrl &= ~(hvs->vc4->is_vc5 ? SCALER5_DISPCTRL_DSPEISLUR(channel) : + SCALER_DISPCTRL_DSPEISLUR(channel)); + + HVS_WRITE(SCALER_DISPCTRL, dispctrl); + + drm_dev_exit(idx); +} + +void vc4_hvs_unmask_underrun(struct vc4_hvs *hvs, int channel) +{ + struct drm_device *drm = &hvs->vc4->base; + u32 dispctrl; + int idx; + + if (!drm_dev_enter(drm, &idx)) + return; + + dispctrl = HVS_READ(SCALER_DISPCTRL); + dispctrl |= (hvs->vc4->is_vc5 ? SCALER5_DISPCTRL_DSPEISLUR(channel) : + SCALER_DISPCTRL_DSPEISLUR(channel)); + + HVS_WRITE(SCALER_DISPSTAT, + SCALER_DISPSTAT_EUFLOW(channel)); + HVS_WRITE(SCALER_DISPCTRL, dispctrl); + + drm_dev_exit(idx); +} + +static void vc4_hvs_report_underrun(struct drm_device *dev) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + + atomic_inc(&vc4->underrun); + DRM_DEV_ERROR(dev->dev, "HVS underrun\n"); +} + +static irqreturn_t vc4_hvs_irq_handler(int irq, void *data) +{ + struct drm_device *dev = data; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_hvs *hvs = vc4->hvs; + irqreturn_t irqret = IRQ_NONE; + int channel; + u32 control; + u32 status; + u32 dspeislur; + + /* + * NOTE: We don't need to protect the register access using + * drm_dev_enter() there because the interrupt handler lifetime + * is tied to the device itself, and not to the DRM device. + * + * So when the device will be gone, one of the first thing we + * will be doing will be to unregister the interrupt handler, + * and then unregister the DRM device. drm_dev_enter() would + * thus always succeed if we are here. + */ + + status = HVS_READ(SCALER_DISPSTAT); + control = HVS_READ(SCALER_DISPCTRL); + + for (channel = 0; channel < SCALER_CHANNELS_COUNT; channel++) { + dspeislur = vc4->is_vc5 ? SCALER5_DISPCTRL_DSPEISLUR(channel) : + SCALER_DISPCTRL_DSPEISLUR(channel); + /* Interrupt masking is not always honored, so check it here. */ + if (status & SCALER_DISPSTAT_EUFLOW(channel) && + control & dspeislur) { + vc4_hvs_mask_underrun(hvs, channel); + vc4_hvs_report_underrun(dev); + + irqret = IRQ_HANDLED; + } + } + + /* Clear every per-channel interrupt flag. */ + HVS_WRITE(SCALER_DISPSTAT, SCALER_DISPSTAT_IRQMASK(0) | + SCALER_DISPSTAT_IRQMASK(1) | + SCALER_DISPSTAT_IRQMASK(2)); + + return irqret; +} + +int vc4_hvs_debugfs_init(struct drm_minor *minor) +{ + struct drm_device *drm = minor->dev; + struct vc4_dev *vc4 = to_vc4_dev(drm); + struct vc4_hvs *hvs = vc4->hvs; + + if (!vc4->hvs) + return -ENODEV; + + if (!vc4->is_vc5) + debugfs_create_bool("hvs_load_tracker", S_IRUGO | S_IWUSR, + minor->debugfs_root, + &vc4->load_tracker_enabled); + + drm_debugfs_add_file(drm, "hvs_dlists", vc4_hvs_debugfs_dlist, NULL); + + drm_debugfs_add_file(drm, "hvs_underrun", vc4_hvs_debugfs_underrun, NULL); + + vc4_debugfs_add_regset32(drm, "hvs_regs", &hvs->regset); + + return 0; +} + +struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4, struct platform_device *pdev) +{ + struct drm_device *drm = &vc4->base; + struct vc4_hvs *hvs; + + hvs = drmm_kzalloc(drm, sizeof(*hvs), GFP_KERNEL); + if (!hvs) + return ERR_PTR(-ENOMEM); + + hvs->vc4 = vc4; + hvs->pdev = pdev; + + spin_lock_init(&hvs->mm_lock); + + /* Set up the HVS display list memory manager. We never + * overwrite the setup from the bootloader (just 128b out of + * our 16K), since we don't want to scramble the screen when + * transitioning from the firmware's boot setup to runtime. + */ + drm_mm_init(&hvs->dlist_mm, + HVS_BOOTLOADER_DLIST_END, + (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END); + + /* Set up the HVS LBM memory manager. We could have some more + * complicated data structure that allowed reuse of LBM areas + * between planes when they don't overlap on the screen, but + * for now we just allocate globally. + */ + if (!vc4->is_vc5) + /* 48k words of 2x12-bit pixels */ + drm_mm_init(&hvs->lbm_mm, 0, 48 * 1024); + else + /* 60k words of 4x12-bit pixels */ + drm_mm_init(&hvs->lbm_mm, 0, 60 * 1024); + + vc4->hvs = hvs; + + return hvs; +} + +static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) +{ + struct platform_device *pdev = to_platform_device(dev); + struct drm_device *drm = dev_get_drvdata(master); + struct vc4_dev *vc4 = to_vc4_dev(drm); + struct vc4_hvs *hvs = NULL; + int ret; + u32 dispctrl; + u32 reg, top; + + hvs = __vc4_hvs_alloc(vc4, NULL); + if (IS_ERR(hvs)) + return PTR_ERR(hvs); + + hvs->regs = vc4_ioremap_regs(pdev, 0); + if (IS_ERR(hvs->regs)) + return PTR_ERR(hvs->regs); + + hvs->regset.base = hvs->regs; + hvs->regset.regs = hvs_regs; + hvs->regset.nregs = ARRAY_SIZE(hvs_regs); + + if (vc4->is_vc5) { + struct rpi_firmware *firmware; + struct device_node *node; + unsigned int max_rate; + + node = rpi_firmware_find_node(); + if (!node) + return -EINVAL; + + firmware = rpi_firmware_get(node); + of_node_put(node); + if (!firmware) + return -EPROBE_DEFER; + + hvs->core_clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(hvs->core_clk)) { + dev_err(&pdev->dev, "Couldn't get core clock\n"); + return PTR_ERR(hvs->core_clk); + } + + max_rate = rpi_firmware_clk_get_max_rate(firmware, + RPI_FIRMWARE_CORE_CLK_ID); + rpi_firmware_put(firmware); + if (max_rate >= 550000000) + hvs->vc5_hdmi_enable_hdmi_20 = true; + + if (max_rate >= 600000000) + hvs->vc5_hdmi_enable_4096by2160 = true; + + hvs->max_core_rate = max_rate; + + ret = clk_prepare_enable(hvs->core_clk); + if (ret) { + dev_err(&pdev->dev, "Couldn't enable the core clock\n"); + return ret; + } + } + + if (!vc4->is_vc5) + hvs->dlist = hvs->regs + SCALER_DLIST_START; + else + hvs->dlist = hvs->regs + SCALER5_DLIST_START; + + /* Upload filter kernels. We only have the one for now, so we + * keep it around for the lifetime of the driver. + */ + ret = vc4_hvs_upload_linear_kernel(hvs, + &hvs->mitchell_netravali_filter, + mitchell_netravali_1_3_1_3_kernel); + if (ret) + return ret; + + reg = HVS_READ(SCALER_DISPECTRL); + reg &= ~SCALER_DISPECTRL_DSP2_MUX_MASK; + HVS_WRITE(SCALER_DISPECTRL, + reg | VC4_SET_FIELD(0, SCALER_DISPECTRL_DSP2_MUX)); + + reg = HVS_READ(SCALER_DISPCTRL); + reg &= ~SCALER_DISPCTRL_DSP3_MUX_MASK; + HVS_WRITE(SCALER_DISPCTRL, + reg | VC4_SET_FIELD(3, SCALER_DISPCTRL_DSP3_MUX)); + + reg = HVS_READ(SCALER_DISPEOLN); + reg &= ~SCALER_DISPEOLN_DSP4_MUX_MASK; + HVS_WRITE(SCALER_DISPEOLN, + reg | VC4_SET_FIELD(3, SCALER_DISPEOLN_DSP4_MUX)); + + reg = HVS_READ(SCALER_DISPDITHER); + reg &= ~SCALER_DISPDITHER_DSP5_MUX_MASK; + HVS_WRITE(SCALER_DISPDITHER, + reg | VC4_SET_FIELD(3, SCALER_DISPDITHER_DSP5_MUX)); + + dispctrl = HVS_READ(SCALER_DISPCTRL); + + dispctrl |= SCALER_DISPCTRL_ENABLE; + dispctrl |= SCALER_DISPCTRL_DISPEIRQ(0) | + SCALER_DISPCTRL_DISPEIRQ(1) | + SCALER_DISPCTRL_DISPEIRQ(2); + + if (!vc4->is_vc5) + dispctrl &= ~(SCALER_DISPCTRL_DMAEIRQ | + SCALER_DISPCTRL_SLVWREIRQ | + SCALER_DISPCTRL_SLVRDEIRQ | + SCALER_DISPCTRL_DSPEIEOF(0) | + SCALER_DISPCTRL_DSPEIEOF(1) | + SCALER_DISPCTRL_DSPEIEOF(2) | + SCALER_DISPCTRL_DSPEIEOLN(0) | + SCALER_DISPCTRL_DSPEIEOLN(1) | + SCALER_DISPCTRL_DSPEIEOLN(2) | + SCALER_DISPCTRL_DSPEISLUR(0) | + SCALER_DISPCTRL_DSPEISLUR(1) | + SCALER_DISPCTRL_DSPEISLUR(2) | + SCALER_DISPCTRL_SCLEIRQ); + else + dispctrl &= ~(SCALER_DISPCTRL_DMAEIRQ | + SCALER5_DISPCTRL_SLVEIRQ | + SCALER5_DISPCTRL_DSPEIEOF(0) | + SCALER5_DISPCTRL_DSPEIEOF(1) | + SCALER5_DISPCTRL_DSPEIEOF(2) | + SCALER5_DISPCTRL_DSPEIEOLN(0) | + SCALER5_DISPCTRL_DSPEIEOLN(1) | + SCALER5_DISPCTRL_DSPEIEOLN(2) | + SCALER5_DISPCTRL_DSPEISLUR(0) | + SCALER5_DISPCTRL_DSPEISLUR(1) | + SCALER5_DISPCTRL_DSPEISLUR(2) | + SCALER_DISPCTRL_SCLEIRQ); + + + /* Set AXI panic mode. + * VC4 panics when < 2 lines in FIFO. + * VC5 panics when less than 1 line in the FIFO. + */ + dispctrl &= ~(SCALER_DISPCTRL_PANIC0_MASK | + SCALER_DISPCTRL_PANIC1_MASK | + SCALER_DISPCTRL_PANIC2_MASK); + dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC0); + dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC1); + dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_PANIC2); + + HVS_WRITE(SCALER_DISPCTRL, dispctrl); + + /* Recompute Composite Output Buffer (COB) allocations for the displays + */ + if (!vc4->is_vc5) { + /* The COB is 20736 pixels, or just over 10 lines at 2048 wide. + * The bottom 2048 pixels are full 32bpp RGBA (intended for the + * TXP composing RGBA to memory), whilst the remainder are only + * 24bpp RGB. + * + * Assign 3 lines to channels 1 & 2, and just over 4 lines to + * channel 0. + */ + #define VC4_COB_SIZE 20736 + #define VC4_COB_LINE_WIDTH 2048 + #define VC4_COB_NUM_LINES 3 + reg = 0; + top = VC4_COB_LINE_WIDTH * VC4_COB_NUM_LINES; + reg |= (top - 1) << 16; + HVS_WRITE(SCALER_DISPBASE2, reg); + reg = top; + top += VC4_COB_LINE_WIDTH * VC4_COB_NUM_LINES; + reg |= (top - 1) << 16; + HVS_WRITE(SCALER_DISPBASE1, reg); + reg = top; + top = VC4_COB_SIZE; + reg |= (top - 1) << 16; + HVS_WRITE(SCALER_DISPBASE0, reg); + } else { + /* The COB is 44416 pixels, or 10.8 lines at 4096 wide. + * The bottom 4096 pixels are full RGBA (intended for the TXP + * composing RGBA to memory), whilst the remainder are only + * RGB. Addressing is always pixel wide. + * + * Assign 3 lines of 4096 to channels 1 & 2, and just over 4 + * lines. to channel 0. + */ + #define VC5_COB_SIZE 44416 + #define VC5_COB_LINE_WIDTH 4096 + #define VC5_COB_NUM_LINES 3 + reg = 0; + top = VC5_COB_LINE_WIDTH * VC5_COB_NUM_LINES; + reg |= top << 16; + HVS_WRITE(SCALER_DISPBASE2, reg); + top += 16; + reg = top; + top += VC5_COB_LINE_WIDTH * VC5_COB_NUM_LINES; + reg |= top << 16; + HVS_WRITE(SCALER_DISPBASE1, reg); + top += 16; + reg = top; + top = VC5_COB_SIZE; + reg |= top << 16; + HVS_WRITE(SCALER_DISPBASE0, reg); + } + + ret = devm_request_irq(dev, platform_get_irq(pdev, 0), + vc4_hvs_irq_handler, 0, "vc4 hvs", drm); + if (ret) + return ret; + + return 0; +} + +static void vc4_hvs_unbind(struct device *dev, struct device *master, + void *data) +{ + struct drm_device *drm = dev_get_drvdata(master); + struct vc4_dev *vc4 = to_vc4_dev(drm); + struct vc4_hvs *hvs = vc4->hvs; + struct drm_mm_node *node, *next; + + if (drm_mm_node_allocated(&vc4->hvs->mitchell_netravali_filter)) + drm_mm_remove_node(&vc4->hvs->mitchell_netravali_filter); + + drm_mm_for_each_node_safe(node, next, &vc4->hvs->dlist_mm) + drm_mm_remove_node(node); + + drm_mm_takedown(&vc4->hvs->dlist_mm); + + drm_mm_for_each_node_safe(node, next, &vc4->hvs->lbm_mm) + drm_mm_remove_node(node); + drm_mm_takedown(&vc4->hvs->lbm_mm); + + clk_disable_unprepare(hvs->core_clk); + + vc4->hvs = NULL; +} + +static const struct component_ops vc4_hvs_ops = { + .bind = vc4_hvs_bind, + .unbind = vc4_hvs_unbind, +}; + +static int vc4_hvs_dev_probe(struct platform_device *pdev) +{ + return component_add(&pdev->dev, &vc4_hvs_ops); +} + +static void vc4_hvs_dev_remove(struct platform_device *pdev) +{ + component_del(&pdev->dev, &vc4_hvs_ops); +} + +static const struct of_device_id vc4_hvs_dt_match[] = { + { .compatible = "brcm,bcm2711-hvs" }, + { .compatible = "brcm,bcm2835-hvs" }, + {} +}; + +struct platform_driver vc4_hvs_driver = { + .probe = vc4_hvs_dev_probe, + .remove_new = vc4_hvs_dev_remove, + .driver = { + .name = "vc4_hvs", + .of_match_table = vc4_hvs_dt_match, + }, +}; |