diff options
author | Matthew Brost <matthew.brost@intel.com> | 2024-11-08 15:23:44 -0800 |
---|---|---|
committer | Matthew Brost <matthew.brost@intel.com> | 2024-11-08 15:23:44 -0800 |
commit | 796a0b364badf22f3b05c7b9804b96aa995e1fb3 (patch) | |
tree | 99672b37118114e90b9e29d4b0774c8303a84cd8 | |
parent | 44313f3c5ad94d8526a5fdb98dac476fe1ceee5f (diff) |
2024y-11m-08d-23h-22m-50s UTC: drm-tip rerere cache update
git version 2.34.1
4 files changed, 0 insertions, 7708 deletions
diff --git a/rr-cache/41549dd6cc337627199acbe6749c5685c7e927d3/preimage.6 b/rr-cache/41549dd6cc337627199acbe6749c5685c7e927d3/preimage.6 deleted file mode 100644 index 232d29e9a561..000000000000 --- a/rr-cache/41549dd6cc337627199acbe6749c5685c7e927d3/preimage.6 +++ /dev/null @@ -1,2234 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include <linux/dma-fence-chain.h> - -#include "xe_pt.h" - -#include "regs/xe_gtt_defs.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_gt.h" -#include "xe_gt_tlb_invalidation.h" -#include "xe_migrate.h" -#include "xe_pt_types.h" -#include "xe_pt_walk.h" -#include "xe_res_cursor.h" -#include "xe_trace.h" -#include "xe_ttm_stolen_mgr.h" -#include "xe_vm.h" - -struct xe_pt_dir { - struct xe_pt pt; - /** @children: Array of page-table child nodes */ - struct xe_ptw *children[XE_PDES]; -}; - -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) -#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) -#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) -#else -#define xe_pt_set_addr(__xe_pt, __addr) -#define xe_pt_addr(__xe_pt) 0ull -#endif - -static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; -static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; - -#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) - -static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) -{ - return container_of(pt, struct xe_pt_dir, pt); -} - -static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) -{ - return container_of(pt_dir->children[index], struct xe_pt, base); -} - -static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, - unsigned int level) -{ - struct xe_device *xe = tile_to_xe(tile); - u16 pat_index = xe->pat.idx[XE_CACHE_WB]; - u8 id = tile->id; - - if (!xe_vm_has_scratch(vm)) - return 0; - - if (level > MAX_HUGEPTE_LEVEL) - return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, - 0, pat_index); - - return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | - XE_PTE_NULL; -} - -static void xe_pt_free(struct xe_pt *pt) -{ - if (pt->level) - kfree(as_xe_pt_dir(pt)); - else - kfree(pt); -} - -/** - * xe_pt_create() - Create a page-table. - * @vm: The vm to create for. - * @tile: The tile to create for. - * @level: The page-table level. - * - * Allocate and initialize a single struct xe_pt metadata structure. Also - * create the corresponding page-table bo, but don't initialize it. If the - * level is grater than zero, then it's assumed to be a directory page- - * table and the directory structure is also allocated and initialized to - * NULL pointers. - * - * Return: A valid struct xe_pt pointer on success, Pointer error code on - * error. - */ -struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level) -{ - struct xe_pt *pt; - struct xe_bo *bo; - int err; - - if (level) { - struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL); - - pt = (dir) ? &dir->pt : NULL; - } else { - pt = kzalloc(sizeof(*pt), GFP_KERNEL); - } - if (!pt) - return ERR_PTR(-ENOMEM); - - pt->level = level; - bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_NO_RESV_EVICT | - XE_BO_FLAG_PAGETABLE); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - goto err_kfree; - } - pt->bo = bo; - pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; - - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, pt->bo); - xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL); - - return pt; - -err_kfree: - xe_pt_free(pt); - return ERR_PTR(err); -} - -/** - * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero - * entries. - * @tile: The tile the scratch pagetable of which to use. - * @vm: The vm we populate for. - * @pt: The pagetable the bo of which to initialize. - * - * Populate the page-table bo of @pt with entries pointing into the tile's - * scratch page-table tree if any. Otherwise populate with zeros. - */ -void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, - struct xe_pt *pt) -{ - struct iosys_map *map = &pt->bo->vmap; - u64 empty; - int i; - - if (!xe_vm_has_scratch(vm)) { - /* - * FIXME: Some memory is allocated already allocated to zero? - * Find out which memory that is and avoid this memset... - */ - xe_map_memset(vm->xe, map, 0, 0, SZ_4K); - } else { - empty = __xe_pt_empty_pte(tile, vm, pt->level); - for (i = 0; i < XE_PDES; i++) - xe_pt_write(vm->xe, map, i, empty); - } -} - -/** - * xe_pt_shift() - Return the ilog2 value of the size of the address range of - * a page-table at a certain level. - * @level: The level. - * - * Return: The ilog2 value of the size of the address range of a page-table - * at level @level. - */ -unsigned int xe_pt_shift(unsigned int level) -{ - return XE_PTE_SHIFT + XE_PDE_SHIFT * level; -} - -/** - * xe_pt_destroy() - Destroy a page-table tree. - * @pt: The root of the page-table tree to destroy. - * @flags: vm flags. Currently unused. - * @deferred: List head of lockless list for deferred putting. NULL for - * immediate putting. - * - * Puts the page-table bo, recursively calls xe_pt_destroy on all children - * and finally frees @pt. TODO: Can we remove the @flags argument? - */ -void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) -{ - int i; - - if (!pt) - return; - - XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list)); - xe_bo_unpin(pt->bo); - xe_bo_put_deferred(pt->bo, deferred); - - if (pt->level > 0 && pt->num_live) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - - for (i = 0; i < XE_PDES; i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, - deferred); - } - } - xe_pt_free(pt); -} - -/** - * DOC: Pagetable building - * - * Below we use the term "page-table" for both page-directories, containing - * pointers to lower level page-directories or page-tables, and level 0 - * page-tables that contain only page-table-entries pointing to memory pages. - * - * When inserting an address range in an already existing page-table tree - * there will typically be a set of page-tables that are shared with other - * address ranges, and a set that are private to this address range. - * The set of shared page-tables can be at most two per level, - * and those can't be updated immediately because the entries of those - * page-tables may still be in use by the gpu for other mappings. Therefore - * when inserting entries into those, we instead stage those insertions by - * adding insertion data into struct xe_vm_pgtable_update structures. This - * data, (subtrees for the cpu and page-table-entries for the gpu) is then - * added in a separate commit step. CPU-data is committed while still under the - * vm lock, the object lock and for userptr, the notifier lock in read mode. - * The GPU async data is committed either by the GPU or CPU after fulfilling - * relevant dependencies. - * For non-shared page-tables (and, in fact, for shared ones that aren't - * existing at the time of staging), we add the data in-place without the - * special update structures. This private part of the page-table tree will - * remain disconnected from the vm page-table tree until data is committed to - * the shared page tables of the vm tree in the commit phase. - */ - -struct xe_pt_update { - /** @update: The update structure we're building for this parent. */ - struct xe_vm_pgtable_update *update; - /** @parent: The parent. Used to detect a parent change. */ - struct xe_pt *parent; - /** @preexisting: Whether the parent was pre-existing or allocated */ - bool preexisting; -}; - -struct xe_pt_stage_bind_walk { - /** base: The base class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @vm: The vm we're building for. */ - struct xe_vm *vm; - /** @tile: The tile we're building for. */ - struct xe_tile *tile; - /** @default_pte: PTE flag only template. No address is associated */ - u64 default_pte; - /** @dma_offset: DMA offset to add to the PTE. */ - u64 dma_offset; - /** - * @needs_64k: This address range enforces 64K alignment and - * granularity. - */ - bool needs_64K; - /** - * @vma: VMA being mapped - */ - struct xe_vma *vma; - - /* Also input, but is updated during the walk*/ - /** @curs: The DMA address cursor. */ - struct xe_res_cursor *curs; - /** @va_curs_start: The Virtual address coresponding to @curs->start */ - u64 va_curs_start; - - /* Output */ - struct xe_walk_update { - /** @wupd.entries: Caller provided storage. */ - struct xe_vm_pgtable_update *entries; - /** @wupd.num_used_entries: Number of update @entries used. */ - unsigned int num_used_entries; - /** @wupd.updates: Tracks the update entry at a given level */ - struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; - } wupd; - - /* Walk state */ - /** - * @l0_end_addr: The end address of the current l0 leaf. Used for - * 64K granularity detection. - */ - u64 l0_end_addr; - /** @addr_64K: The start address of the current 64K chunk. */ - u64 addr_64K; - /** @found_64: Whether @add_64K actually points to a 64K chunk. */ - bool found_64K; -}; - -static int -xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, - pgoff_t offset, bool alloc_entries) -{ - struct xe_pt_update *upd = &wupd->updates[parent->level]; - struct xe_vm_pgtable_update *entry; - - /* - * For *each level*, we could only have one active - * struct xt_pt_update at any one time. Once we move on to a - * new parent and page-directory, the old one is complete, and - * updates are either already stored in the build tree or in - * @wupd->entries - */ - if (likely(upd->parent == parent)) - return 0; - - upd->parent = parent; - upd->preexisting = true; - - if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) - return -EINVAL; - - entry = wupd->entries + wupd->num_used_entries++; - upd->update = entry; - entry->ofs = offset; - entry->pt_bo = parent->bo; - entry->pt = parent; - entry->flags = 0; - entry->qwords = 0; - - if (alloc_entries) { - entry->pt_entries = kmalloc_array(XE_PDES, - sizeof(*entry->pt_entries), - GFP_KERNEL); - if (!entry->pt_entries) - return -ENOMEM; - } - - return 0; -} - -/* - * NOTE: This is a very frequently called function so we allow ourselves - * to annotate (using branch prediction hints) the fastpath of updating a - * non-pre-existing pagetable with leaf ptes. - */ -static int -xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, - pgoff_t offset, struct xe_pt *xe_child, u64 pte) -{ - struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; - struct xe_pt_update *child_upd = xe_child ? - &xe_walk->wupd.updates[xe_child->level] : NULL; - int ret; - - ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); - if (unlikely(ret)) - return ret; - - /* - * Register this new pagetable so that it won't be recognized as - * a shared pagetable by a subsequent insertion. - */ - if (unlikely(child_upd)) { - child_upd->update = NULL; - child_upd->parent = xe_child; - child_upd->preexisting = false; - } - - if (likely(!upd->preexisting)) { - /* Continue building a non-connected subtree. */ - struct iosys_map *map = &parent->bo->vmap; - - if (unlikely(xe_child)) - parent->base.children[offset] = &xe_child->base; - - xe_pt_write(xe_walk->vm->xe, map, offset, pte); - parent->num_live++; - } else { - /* Shared pt. Stage update. */ - unsigned int idx; - struct xe_vm_pgtable_update *entry = upd->update; - - idx = offset - entry->ofs; - entry->pt_entries[idx].pt = xe_child; - entry->pt_entries[idx].pte = pte; - entry->qwords++; - } - - return 0; -} - -static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, - struct xe_pt_stage_bind_walk *xe_walk) -{ - u64 size, dma; - - if (level > MAX_HUGEPTE_LEVEL) - return false; - - /* Does the virtual range requested cover a huge pte? */ - if (!xe_pt_covers(addr, next, level, &xe_walk->base)) - return false; - - /* Does the DMA segment cover the whole pte? */ - if (next - xe_walk->va_curs_start > xe_walk->curs->size) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - /* Is the DMA address huge PTE size aligned? */ - size = next - addr; - dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); - - return IS_ALIGNED(dma, size); -} - -/* - * Scan the requested mapping to check whether it can be done entirely - * with 64K PTEs. - */ -static bool -xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - struct xe_res_cursor curs = *xe_walk->curs; - - if (!IS_ALIGNED(addr, SZ_64K)) - return false; - - if (next > xe_walk->l0_end_addr) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - xe_res_next(&curs, addr - xe_walk->va_curs_start); - for (; addr < next; addr += SZ_64K) { - if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) - return false; - - xe_res_next(&curs, SZ_64K); - } - - return addr == next; -} - -/* - * For non-compact "normal" 4K level-0 pagetables, we want to try to group - * addresses together in 64K-contigous regions to add a 64K TLB hint for the - * device to the PTE. - * This function determines whether the address is part of such a - * segment. For VRAM in normal pagetables, this is strictly necessary on - * some devices. - */ -static bool -xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - /* Address is within an already found 64k region */ - if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) - return true; - - xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); - xe_walk->addr_64K = addr; - - return xe_walk->found_64K; -} - -static int -xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_bind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - u16 pat_index = xe_walk->vma->pat_index; - struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); - struct xe_vm *vm = xe_walk->vm; - struct xe_pt *xe_child; - bool covers; - int ret = 0; - u64 pte; - - /* Is this a leaf entry ?*/ - if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { - struct xe_res_cursor *curs = xe_walk->curs; - bool is_null = xe_vma_is_null(xe_walk->vma); - - XE_WARN_ON(xe_walk->va_curs_start != addr); - - pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : - xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->vma, pat_index, level); - pte |= xe_walk->default_pte; - - /* - * Set the XE_PTE_PS64 hint if possible, otherwise if - * this device *requires* 64K PTE size for VRAM, fail. - */ - if (level == 0 && !xe_parent->is_compact) { - if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; - pte |= XE_PTE_PS64; - } else if (XE_WARN_ON(xe_walk->needs_64K)) { - return -EINVAL; - } - } - - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); - if (unlikely(ret)) - return ret; - - if (!is_null) - xe_res_next(curs, next - addr); - xe_walk->va_curs_start = next; - xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); - *action = ACTION_CONTINUE; - - return ret; - } - - /* - * Descending to lower level. Determine if we need to allocate a - * new page table or -directory, which we do if there is no - * previous one or there is one we can completely replace. - */ - if (level == 1) { - walk->shifts = xe_normal_pt_shifts; - xe_walk->l0_end_addr = next; - } - - covers = xe_pt_covers(addr, next, level, &xe_walk->base); - if (covers || !*child) { - u64 flags = 0; - - xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); - if (IS_ERR(xe_child)) - return PTR_ERR(xe_child); - - xe_pt_set_addr(xe_child, - round_down(addr, 1ull << walk->shifts[level])); - - if (!covers) - xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child); - - *child = &xe_child->base; - - /* - * Prefer the compact pagetable layout for L0 if possible. Only - * possible if VMA covers entire 2MB region as compact 64k and - * 4k pages cannot be mixed within a 2MB region. - * TODO: Suballocate the pt bo to avoid wasting a lot of - * memory. - */ - if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && - covers && xe_pt_scan_64K(addr, next, xe_walk)) { - walk->shifts = xe_compact_pt_shifts; - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT; - flags |= XE_PDE_64K; - xe_child->is_compact = true; - } - - pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, - pte); - } - - *action = ACTION_SUBTREE; - return ret; -} - -static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { - .pt_entry = xe_pt_stage_bind_entry, -}; - -/** - * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address - * range. - * @tile: The tile we're building for. - * @vma: The vma indicating the address range. - * @entries: Storage for the update entries used for connecting the tree to - * the main tree at commit time. - * @num_entries: On output contains the number of @entries used. - * - * This function builds a disconnected page-table tree for a given address - * range. The tree is connected to the main vm tree for the gpu using - * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). - * The function builds xe_vm_pgtable_update structures for already existing - * shared page-tables, and non-existing shared and non-shared page-tables - * are built and populated directly. - * - * Return 0 on success, negative error code on error. - */ -static int -xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - struct xe_device *xe = tile_to_xe(tile); - struct xe_bo *bo = xe_vma_bo(vma); - bool is_devmem = !xe_vma_is_userptr(vma) && bo && - (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); - struct xe_res_cursor curs; - struct xe_pt_stage_bind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_bind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .vm = xe_vma_vm(vma), - .tile = tile, - .curs = &curs, - .va_curs_start = xe_vma_start(vma), - .vma = vma, - .wupd.entries = entries, - .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - int ret; - - /** - * Default atomic expectations for different allocation scenarios are as follows: - * - * 1. Traditional API: When the VM is not in LR mode: - * - Device atomics are expected to function with all allocations. - * - * 2. Compute/SVM API: When the VM is in LR mode: - * - Device atomics are the default behavior when the bo is placed in a single region. - * - In all other cases device atomics will be disabled with AE=0 until an application - * request differently using a ioctl like madvise. - */ - if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { - if (xe_vm_in_lr_mode(xe_vma_vm(vma))) { - if (bo && xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - /** - * If a SMEM+LMEM allocation is backed by SMEM, a device - * atomics will cause a gpu page fault and which then - * gets migrated to LMEM, bind such allocations with - * device atomics enabled. - */ - else if (is_devmem && !xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } else { - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } - - /** - * Unset AE if the platform(PVC) doesn't support it on an - * allocation - */ - if (!xe->info.has_device_atomics_on_smem && !is_devmem) - xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE; - } - - if (is_devmem) { - xe_walk.default_pte |= XE_PPGTT_PTE_DM; - xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); - } - - if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) - xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); - - xe_bo_assert_held(bo); - - if (!xe_vma_is_null(vma)) { - if (xe_vma_is_userptr(vma)) - xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, - xe_vma_size(vma), &curs); - else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) - xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - else - xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - } else { - curs.size = xe_vma_size(vma); - } - - ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - *num_entries = xe_walk.wupd.num_used_entries; - return ret; -} - -/** - * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a - * shared pagetable. - * @addr: The start address within the non-shared pagetable. - * @end: The end address within the non-shared pagetable. - * @level: The level of the non-shared pagetable. - * @walk: Walk info. The function adjusts the walk action. - * @action: next action to perform (see enum page_walk_action) - * @offset: Ignored on input, First non-shared entry on output. - * @end_offset: Ignored on input, Last non-shared entry + 1 on output. - * - * A non-shared page-table has some entries that belong to the address range - * and others that don't. This function determines the entries that belong - * fully to the address range. Depending on level, some entries may - * partially belong to the address range (that can't happen at level 0). - * The function detects that and adjust those offsets to not include those - * partial entries. Iff it does detect partial entries, we know that there must - * be shared page tables also at lower levels, so it adjusts the walk action - * accordingly. - * - * Return: true if there were non-shared entries, false otherwise. - */ -static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, - struct xe_pt_walk *walk, - enum page_walk_action *action, - pgoff_t *offset, pgoff_t *end_offset) -{ - u64 size = 1ull << walk->shifts[level]; - - *offset = xe_pt_offset(addr, level, walk); - *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; - - if (!level) - return true; - - /* - * If addr or next are not size aligned, there are shared pts at lower - * level, so in that case traverse down the subtree - */ - *action = ACTION_CONTINUE; - if (!IS_ALIGNED(addr, size)) { - *action = ACTION_SUBTREE; - (*offset)++; - } - - if (!IS_ALIGNED(end, size)) { - *action = ACTION_SUBTREE; - (*end_offset)--; - } - - return *end_offset > *offset; -} - -struct xe_pt_zap_ptes_walk { - /** @base: The walk base-class */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're building for */ - struct xe_tile *tile; - - /* Output */ - /** @needs_invalidate: Whether we need to invalidate TLB*/ - bool needs_invalidate; -}; - -static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_zap_ptes_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - /* - * Note that we're called from an entry callback, and we're dealing - * with the child of that entry rather than the parent, so need to - * adjust level down. - */ - if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, - &end_offset)) { - xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap, - offset * sizeof(u64), 0, - (end_offset - offset) * sizeof(u64)); - xe_walk->needs_invalidate = true; - } - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { - .pt_entry = xe_pt_zap_ptes_entry, -}; - -/** - * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range - * @tile: The tile we're zapping for. - * @vma: GPU VMA detailing address range. - * - * Eviction and Userptr invalidation needs to be able to zap the - * gpu ptes of a given address range in pagefaulting mode. - * In order to be able to do that, that function needs access to the shared - * page-table entrieaso it can either clear the leaf PTEs or - * clear the pointers to lower-level page-tables. The caller is required - * to hold the necessary locks to ensure neither the page-table connectivity - * nor the page-table entries of the range is updated from under us. - * - * Return: Whether ptes were actually updated and a TLB invalidation is - * required. - */ -bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) -{ - struct xe_pt_zap_ptes_walk xe_walk = { - .base = { - .ops = &xe_pt_zap_ptes_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); - - if (!(pt_mask & BIT(tile->id))) - return false; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.needs_invalidate; -} - -static void -xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, - struct iosys_map *map, void *data, - u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_pt_entry *ptes = update->pt_entries; - u64 *ptr = data; - u32 i; - - for (i = 0; i < num_qwords; i++) { - if (map) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, ptes[i].pte); - else - ptr[i] = ptes[i].pte; - } -} - -static void xe_pt_abort_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i, j; - - for (i = 0; i < num_entries; i++) { - if (!entries[i].pt_entries) - continue; - - for (j = 0; j < entries[i].qwords; j++) - xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL); - kfree(entries[i].pt_entries); - } -} - -static void xe_pt_commit_locks_assert(struct xe_vma *vma) -{ - struct xe_vm *vm = xe_vma_vm(vma); - - lockdep_assert_held(&vm->lock); - - if (xe_vma_is_userptr(vma)) - lockdep_assert_held_read(&vm->userptr.notifier_lock); - else if (!xe_vma_is_null(vma)) - dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); - - xe_vm_assert_held(vm); -} - -static void xe_pt_commit_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries, bool rebind, - struct llist_head *deferred) -{ - u32 i, j; - - xe_pt_commit_locks_assert(vma); - - for (i = 0; i < num_entries; i++) { - struct xe_pt *pt = entries[i].pt; - struct xe_pt_dir *pt_dir; - - if (!rebind) - pt->num_live += entries[i].qwords; - - if (!pt->level) { - kfree(entries[i].pt_entries); - continue; - } - - pt_dir = as_xe_pt_dir(pt); - for (j = 0; j < entries[i].qwords; j++) { - u32 j_ = j + entries[i].ofs; - struct xe_pt *newpte = entries[i].pt_entries[j].pt; - - if (xe_pt_entry(pt_dir, j_)) - xe_pt_destroy(xe_pt_entry(pt_dir, j_), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[j_] = &newpte->base; - } - kfree(entries[i].pt_entries); - } -} - -static int -xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - int err; - - *num_entries = 0; - err = xe_pt_stage_bind(tile, vma, entries, num_entries); - if (!err) - xe_tile_assert(tile, *num_entries); - else /* abort! */ - xe_pt_abort_bind(vma, entries, *num_entries); - - return err; -} - -static void xe_vm_dbg_print_entries(struct xe_device *xe, - const struct xe_vm_pgtable_update *entries, - unsigned int num_entries) -#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) -{ - unsigned int i; - - vm_dbg(&xe->drm, "%u entries to update\n", num_entries); - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - struct xe_pt *xe_pt = entry->pt; - u64 page_size = 1ull << xe_pt_shift(xe_pt->level); - u64 end; - u64 start; - - xe_assert(xe, !entry->pt->is_compact); - start = entry->ofs * page_size; - end = start + page_size * entry->qwords; - vm_dbg(&xe->drm, - "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", - i, xe_pt->level, entry->ofs, entry->qwords, - xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); - } -} -#else -{} -#endif - -#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT - -static int xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; - static u32 count; - - if (count++ % divisor == divisor - 1) { - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - - uvma->userptr.divisor = divisor << 1; - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - return true; - } - - return false; -} - -#else - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - return false; -} - -#endif - -/** - * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks - * @base: Base we derive from. - * @bind: Whether this is a bind or an unbind operation. A bind operation - * makes the pre-commit callback error with -EAGAIN if it detects a - * pending invalidation. - * @locked: Whether the pre-commit callback locked the userptr notifier lock - * and it needs unlocking. - */ -struct xe_pt_migrate_pt_update { - struct xe_migrate_pt_update base; - bool bind; - bool locked; -}; - -/* - * This function adds the needed dependencies to a page-table update job - * to make sure racing jobs for separate bind engines don't race writing - * to the same page-table range, wreaking havoc. Initially use a single - * fence for the entire VM. An optimization would use smaller granularity. - */ -static int xe_pt_vm_dependencies(struct xe_sched_job *job, - struct xe_range_fence_tree *rftree, - u64 start, u64 last) -{ - struct xe_range_fence *rtfence; - struct dma_fence *fence; - int err; - - rtfence = xe_range_fence_tree_first(rftree, start, last); - while (rtfence) { - fence = rtfence->fence; - - if (!dma_fence_is_signaled(fence)) { - /* - * Is this a CPU update? GPU is busy updating, so return - * an error - */ - if (!job) - return -ETIME; - - dma_fence_get(fence); - err = drm_sched_job_add_dependency(&job->drm, fence); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_next(rtfence, start, last); - } - - return 0; -} - -static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_range_fence_tree *rftree = - &xe_vma_vm(pt_update->vma)->rftree[pt_update->tile_id]; - - return xe_pt_vm_dependencies(pt_update->job, rftree, - pt_update->start, pt_update->last); -} - -static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_pt_migrate_pt_update *userptr_update = - container_of(pt_update, typeof(*userptr_update), base); - struct xe_userptr_vma *uvma = to_userptr_vma(pt_update->vma); - unsigned long notifier_seq = uvma->userptr.notifier_seq; - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - int err = xe_pt_vm_dependencies(pt_update->job, - &vm->rftree[pt_update->tile_id], - pt_update->start, - pt_update->last); - - if (err) - return err; - - userptr_update->locked = false; - - /* - * Wait until nobody is running the invalidation notifier, and - * since we're exiting the loop holding the notifier lock, - * nobody can proceed invalidating either. - * - * Note that we don't update the vma->userptr.notifier_seq since - * we don't update the userptr pages. - */ - do { - down_read(&vm->userptr.notifier_lock); - if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq)) - break; - - up_read(&vm->userptr.notifier_lock); - - if (userptr_update->bind) - return -EAGAIN; - - notifier_seq = mmu_interval_read_begin(&uvma->userptr.notifier); - } while (true); - - /* Inject errors to test_whether they are handled correctly */ - if (userptr_update->bind && xe_pt_userptr_inject_eagain(uvma)) { - up_read(&vm->userptr.notifier_lock); - return -EAGAIN; - } - - userptr_update->locked = true; - - return 0; -} - -static const struct xe_migrate_pt_update_ops bind_ops = { - .populate = xe_vm_populate_pgtable, - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_bind_ops = { - .populate = xe_vm_populate_pgtable, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -struct invalidation_fence { - struct xe_gt_tlb_invalidation_fence base; - struct xe_gt *gt; - struct dma_fence *fence; - struct dma_fence_cb cb; - struct work_struct work; - u64 start; - u64 end; - u32 asid; -}; - -static void invalidation_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct invalidation_fence *ifence = - container_of(cb, struct invalidation_fence, cb); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); - if (!ifence->fence->error) { - queue_work(system_wq, &ifence->work); - } else { - ifence->base.base.error = ifence->fence->error; - dma_fence_signal(&ifence->base.base); - dma_fence_put(&ifence->base.base); - } - dma_fence_put(ifence->fence); -} - -static void invalidation_fence_work_func(struct work_struct *w) -{ - struct invalidation_fence *ifence = - container_of(w, struct invalidation_fence, work); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); - xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, - ifence->end, ifence->asid); -} - -static int invalidation_fence_init(struct xe_gt *gt, - struct invalidation_fence *ifence, - struct dma_fence *fence, - u64 start, u64 end, u32 asid) -{ - int ret; - - trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - - ifence->fence = fence; - ifence->gt = gt; - ifence->start = start; - ifence->end = end; - ifence->asid = asid; - - INIT_WORK(&ifence->work, invalidation_fence_work_func); - ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); - if (ret == -ENOENT) { - dma_fence_put(ifence->fence); /* Usually dropped in CB */ - invalidation_fence_work_func(&ifence->work); - } else if (ret) { - dma_fence_put(&ifence->base.base); /* Caller ref */ - dma_fence_put(&ifence->base.base); /* Creation ref */ - } - - xe_gt_assert(gt, !ret || ret == -ENOENT); - - return ret && ret != -ENOENT ? ret : 0; -} - -static void xe_pt_calc_rfence_interval(struct xe_vma *vma, - struct xe_pt_migrate_pt_update *update, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - int i, level = 0; - - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - update->base.start = ALIGN_DOWN(xe_vma_start(vma), - 0x1ull << xe_pt_shift(level)); - update->base.last = ALIGN(xe_vma_end(vma), - 0x1ull << xe_pt_shift(level)) - 1; -} - -/** - * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma - * address range. - * @tile: The tile to bind for. - * @vma: The vma to bind. - * @q: The exec_queue with which to do pipelined page-table updates. - * @syncs: Entries to sync on before binding the built tree to the live vm tree. - * @num_syncs: Number of @sync entries. - * @rebind: Whether we're rebinding this vma to the same address range without - * an unbind in-between. - * - * This function builds a page-table tree (see xe_pt_stage_bind() for more - * information on page-table building), and the xe_vm_pgtable_update entries - * abstracting the operations needed to attach it to the main vm tree. It - * then takes the relevant locks and updates the metadata side of the main - * vm tree and submits the operations for pipelined attachment of the - * gpu page-table to the vm main tree, (which can be done either by the - * cpu and the GPU). - * - * Return: A valid dma-fence representing the pipelined attachment operation - * on success, an error pointer on error. - */ -struct dma_fence * -__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs, - bool rebind) -{ - struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; - struct xe_pt_migrate_pt_update bind_pt_update = { - .base = { - .ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops, - .vma = vma, - .tile_id = tile->id, - }, - .bind = true, - }; - struct xe_vm *vm = xe_vma_vm(vma); - u32 num_entries; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL; - struct xe_range_fence *rfence; - int err; - - bind_pt_update.locked = false; - xe_bo_assert_held(xe_vma_bo(vma)); - xe_vm_assert_held(vm); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma), q); - - err = xe_pt_prepare_bind(tile, vma, entries, &num_entries); - if (err) - goto err; - - err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); - if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); - if (err) - goto err; - - xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); - - xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); - xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries, - num_entries); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. on LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * on !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) - return ERR_PTR(-ENOMEM); - } else if (rebind && !xe_vm_in_lr_mode(vm)) { - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - } - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - kfree(ifence); - return ERR_PTR(-ENOMEM); - } - - fence = xe_migrate_update_pgtables(tile->migrate, - vm, xe_vma_bo(vma), q, - entries, num_entries, - syncs, num_syncs, - &bind_pt_update.base); - if (!IS_ERR(fence)) { - bool last_munmap_rebind = vma->gpuva.flags & XE_VMA_LAST_REBIND; - LLIST_HEAD(deferred); - int err; - - err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - bind_pt_update.base.start, - bind_pt_update.base.last, fence); - if (err) - dma_fence_wait(fence, false); - - /* TLB invalidation must be done before signaling rebind */ - if (ifence) { - int err = invalidation_fence_init(tile->primary_gt, - ifence, fence, - xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); - if (err) { - dma_fence_put(fence); - kfree(ifence); - return ERR_PTR(err); - } - fence = &ifence->base.base; - } - - /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, rebind || - last_munmap_rebind ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - DMA_RESV_USAGE_BOOKKEEP); - xe_pt_commit_bind(vma, entries, num_entries, rebind, - bind_pt_update.locked ? &deferred : NULL); - - /* This vma is live (again?) now */ - vma->tile_present |= BIT(tile->id); - - if (bind_pt_update.locked) { - to_userptr_vma(vma)->userptr.initial_bind = true; - up_read(&vm->userptr.notifier_lock); - xe_bo_put_commit(&deferred); - } - if (!rebind && last_munmap_rebind && - xe_vm_in_preempt_fence_mode(vm)) - xe_vm_queue_rebind_worker(vm); - } else { - kfree(rfence); - kfree(ifence); - if (bind_pt_update.locked) - up_read(&vm->userptr.notifier_lock); - xe_pt_abort_bind(vma, entries, num_entries); - } - - return fence; - -err: - return ERR_PTR(err); -} - -struct xe_pt_stage_unbind_walk { - /** @base: The pagewalk base-class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're unbinding from. */ - struct xe_tile *tile; - - /** - * @modified_start: Walk range start, modified to include any - * shared pagetables that we're the only user of and can thus - * treat as private. - */ - u64 modified_start; - /** @modified_end: Walk range start, modified like @modified_start. */ - u64 modified_end; - - /* Output */ - /* @wupd: Structure to track the page-table updates we're building */ - struct xe_walk_update wupd; -}; - -/* - * Check whether this range is the only one populating this pagetable, - * and in that case, update the walk range checks so that higher levels don't - * view us as a shared pagetable. - */ -static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, - const struct xe_pt *child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - unsigned int shift = walk->shifts[level]; - u64 size = 1ull << shift; - - if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && - ((next - addr) >> shift) == child->num_live) { - u64 size = 1ull << walk->shifts[level + 1]; - - *action = ACTION_CONTINUE; - - if (xe_walk->modified_start >= addr) - xe_walk->modified_start = round_down(addr, size); - if (xe_walk->modified_end <= next) - xe_walk->modified_end = round_up(next, size); - - return true; - } - - return false; -} - -static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); - - return 0; -} - -static int -xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - u64 size = 1ull << walk->shifts[--level]; - - if (!IS_ALIGNED(addr, size)) - addr = xe_walk->modified_start; - if (!IS_ALIGNED(next, size)) - next = xe_walk->modified_end; - - /* Parent == *child is the root pt. Don't kill it. */ - if (parent != *child && - xe_pt_check_kill(addr, next, level, xe_child, action, walk)) - return 0; - - if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, - &end_offset)) - return 0; - - (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false); - xe_walk->wupd.updates[level].update->qwords = end_offset - offset; - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { - .pt_entry = xe_pt_stage_unbind_entry, - .pt_post_descend = xe_pt_stage_unbind_post_descend, -}; - -/** - * xe_pt_stage_unbind() - Build page-table update structures for an unbind - * operation - * @tile: The tile we're unbinding for. - * @vma: The vma we're unbinding. - * @entries: Caller-provided storage for the update structures. - * - * Builds page-table update structures for an unbind operation. The function - * will attempt to remove all page-tables that we're the only user - * of, and for that to work, the unbind operation must be committed in the - * same critical section that blocks racing binds to the same page-table tree. - * - * Return: The number of entries used. - */ -static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries) -{ - struct xe_pt_stage_unbind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_unbind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - .modified_start = xe_vma_start(vma), - .modified_end = xe_vma_end(vma), - .wupd.entries = entries, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.wupd.num_used_entries; -} - -static void -xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, - struct xe_tile *tile, struct iosys_map *map, - void *ptr, u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_vma *vma = pt_update->vma; - u64 empty = __xe_pt_empty_pte(tile, xe_vma_vm(vma), update->pt->level); - int i; - - if (map && map->is_iomem) - for (i = 0; i < num_qwords; ++i) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, empty); - else if (map) - memset64(map->vaddr + qword_ofs * sizeof(u64), empty, - num_qwords); - else - memset64(ptr, empty, num_qwords); -} - -static void -xe_pt_commit_unbind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 num_entries, - struct llist_head *deferred) -{ - u32 j; - - xe_pt_commit_locks_assert(vma); - - for (j = 0; j < num_entries; ++j) { - struct xe_vm_pgtable_update *entry = &entries[j]; - struct xe_pt *pt = entry->pt; - - pt->num_live -= entry->qwords; - if (pt->level) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - u32 i; - - for (i = entry->ofs; i < entry->ofs + entry->qwords; - i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[i] = NULL; - } - } - } -} - -<<<<<<< -static const struct xe_migrate_pt_update_ops unbind_ops = { - .populate = xe_migrate_clear_pgtable_callback, -======= -static void -xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int i, level = 0; - u64 start, last; - - for (i = 0; i < pt_op->num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &pt_op->entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level)); - last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1; - - if (start < pt_update_ops->start) - pt_update_ops->start = start; - if (last > pt_update_ops->last) - pt_update_ops->last = last; -} - -static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, - xe->info.tile_count); - - return 0; -} - -static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int err; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - pt_op->vma = NULL; - pt_op->bind = true; - pt_op->rebind = BIT(tile->id) & vma->tile_present; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - err = xe_pt_prepare_bind(tile, vma, pt_op->entries, - &pt_op->num_entries); - if (!err) { - xe_tile_assert(tile, pt_op->num_entries <= - ARRAY_SIZE(pt_op->entries)); - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, true); - - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. On LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * On !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!pt_op->rebind && xe_vm_has_scratch(vm) && - xe_vm_in_preempt_fence_mode(vm))) - pt_update_ops->needs_invalidation = true; - else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - - vma->tile_staged |= BIT(tile->id); - pt_op->vma = vma; - xe_pt_commit_prepare_bind(vma, pt_op->entries, - pt_op->num_entries, pt_op->rebind); - } else { - xe_pt_cancel_bind(vma, pt_op->entries, pt_op->num_entries); - } - - return err; -} - -static int unbind_op_prepare(struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int err; - - if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id))) - return 0; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - /* - * Wait for invalidation to complete. Can corrupt internal page table - * state if an invalidation is running while preparing an unbind. - */ - if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma))) - mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier); - - pt_op->vma = vma; - pt_op->bind = false; - pt_op->rebind = false; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries); - - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, false); - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - pt_update_ops->needs_invalidation = true; - - xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries); - - return 0; -} - -static int op_prepare(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op) -{ - int err = 0; - - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma); - pt_update_ops->wait_vm_kernel = true; - break; - case DRM_GPUVA_OP_REMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va)); - - if (!err && op->remap.prev) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.prev); - pt_update_ops->wait_vm_bookkeep = true; - } - if (!err && op->remap.next) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.next); - pt_update_ops->wait_vm_bookkeep = true; - } - break; - case DRM_GPUVA_OP_UNMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va)); - break; - case DRM_GPUVA_OP_PREFETCH: - err = bind_op_prepare(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va)); - pt_update_ops->wait_vm_kernel = true; - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static void -xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops) -{ - init_llist_head(&pt_update_ops->deferred); - pt_update_ops->start = ~0x0ull; - pt_update_ops->last = 0x0ull; -} - -/** - * xe_pt_update_ops_prepare() - Prepare PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Prepare PT update operations which includes updating internal PT state, - * allocate memory for page tables, populate page table being pruned in, and - * create PT update operations for leaf insertion / removal. - * - * Return: 0 on success, negative error code on error. - */ -int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct xe_vma_op *op; - int err; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - xe_pt_update_ops_init(pt_update_ops); - - err = dma_resv_reserve_fences(xe_vm_resv(vops->vm), - tile_to_xe(tile)->info.tile_count); - if (err) - return err; - - list_for_each_entry(op, &vops->list, link) { - err = op_prepare(vops->vm, tile, pt_update_ops, op); - - if (err) - return err; - } - - xe_tile_assert(tile, pt_update_ops->current_op <= - pt_update_ops->num_ops); - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE) - return -ENOSPC; -#endif - - return 0; -} - -static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present |= BIT(tile->id); - vma->tile_staged &= ~BIT(tile->id); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - to_userptr_vma(vma)->userptr.initial_bind = true; - } - - /* - * Kick rebind worker if this bind triggers preempt fences and not in - * the rebind worker - */ - if (pt_update_ops->wait_vm_bookkeep && - xe_vm_in_preempt_fence_mode(vm) && - !current->mm) - xe_vm_queue_rebind_worker(vm); -} - -static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present &= ~BIT(tile->id); - if (!vma->tile_present) { - list_del_init(&vma->combined_links.rebind); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - } -} - -static void op_commit(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op, struct dma_fence *fence, - struct dma_fence *fence2) -{ - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, - fence2); - break; - case DRM_GPUVA_OP_REMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence, - fence2); - - if (op->remap.prev) - bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence, fence2); - if (op->remap.next) - bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence, fence2); - break; - case DRM_GPUVA_OP_UNMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence, fence2); - break; - case DRM_GPUVA_OP_PREFETCH: - bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence, fence2); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } -} - -static const struct xe_migrate_pt_update_ops migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, ->>>>>>> - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { - .populate = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -/** - * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma - * address range. - * @tile: The tile to unbind for. - * @vma: The vma to unbind. - * @q: The exec_queue with which to do pipelined page-table updates. - * @syncs: Entries to sync on before disconnecting the tree to be destroyed. - * @num_syncs: Number of @sync entries. - * - * This function builds a the xe_vm_pgtable_update entries abstracting the - * operations needed to detach the page-table tree to be destroyed from the - * man vm tree. - * It then takes the relevant locks and submits the operations for - * pipelined detachment of the gpu page-table from the vm main tree, - * (which can be done either by the cpu and the GPU), Finally it frees the - * detached page-table tree. - * - * Return: A valid dma-fence representing the pipelined detachment operation - * on success, an error pointer on error. - */ -struct dma_fence * -__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs) -{ -<<<<<<< - struct xe_vm *vm = vops->vm; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL, *mfence = NULL; - struct dma_fence_chain *chain_fence = NULL; - struct xe_range_fence *rfence; - struct xe_vma_op *op; - int err = 0, i; - struct xe_migrate_pt_update update = { - .ops = pt_update_ops->needs_userptr_lock ? - &userptr_migrate_ops : - &migrate_ops, - .vops = vops, - .tile_id = tile->id, -======= - struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; - struct xe_pt_migrate_pt_update unbind_pt_update = { - .base = { - .ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops : - &unbind_ops, - .vma = vma, - .tile_id = tile->id, - }, ->>>>>>> - }; - struct xe_vm *vm = xe_vma_vm(vma); - u32 num_entries; - struct dma_fence *fence = NULL; - struct invalidation_fence *ifence; - struct xe_range_fence *rfence; - int err; - - LLIST_HEAD(deferred); - - xe_bo_assert_held(xe_vma_bo(vma)); - xe_vm_assert_held(vm); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma), q); - - num_entries = xe_pt_stage_unbind(tile, vma, entries); - xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); - - xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); - xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, - num_entries); - -<<<<<<< - err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); - if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); - if (err) - return ERR_PTR(err); - - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) - return ERR_PTR(-ENOMEM); -======= - if (pt_update_ops->needs_invalidation) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) { - err = -ENOMEM; - goto kill_vm_tile1; - } - if (tile->media_gt) { - mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!mfence) { - err = -ENOMEM; - goto free_ifence; - } - chain_fence = dma_fence_chain_alloc(); - if (!chain_fence) { - err = -ENOMEM; - goto free_ifence; - } - } - } ->>>>>>> - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - kfree(ifence); - return ERR_PTR(-ENOMEM); - } - - /* - * Even if we were already evicted and unbind to destroy, we need to - * clear again here. The eviction may have updated pagetables at a - * lower level, because it needs to be more conservative. - */ - fence = xe_migrate_update_pgtables(tile->migrate, - vm, NULL, q ? q : - vm->q[tile->id], - entries, num_entries, - syncs, num_syncs, - &unbind_pt_update.base); - if (!IS_ERR(fence)) { - int err; - - err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - unbind_pt_update.base.start, - unbind_pt_update.base.last, fence); - if (err) - dma_fence_wait(fence, false); - -<<<<<<< - /* TLB invalidation must be done before signaling unbind */ - err = invalidation_fence_init(tile->primary_gt, ifence, fence, - xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); - if (err) { - dma_fence_put(fence); - kfree(ifence); - return ERR_PTR(err); - } - fence = &ifence->base.base; - - /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, - DMA_RESV_USAGE_BOOKKEEP); - - /* This fence will be installed by caller when doing eviction */ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - DMA_RESV_USAGE_BOOKKEEP); - xe_pt_commit_unbind(vma, entries, num_entries, - unbind_pt_update.locked ? &deferred : NULL); - vma->tile_present &= ~BIT(tile->id); - } else { - kfree(rfence); - kfree(ifence); - } - - if (!vma->tile_present) - list_del_init(&vma->combined_links.rebind); - - if (unbind_pt_update.locked) { - xe_tile_assert(tile, xe_vma_is_userptr(vma)); -======= - xe_pt_commit(pt_op->vma, pt_op->entries, - pt_op->num_entries, &pt_update_ops->deferred); - pt_op->vma = NULL; /* skip in xe_pt_update_ops_abort */ - } - - if (xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - pt_update_ops->start, - pt_update_ops->last, fence)) - dma_fence_wait(fence, false); - - /* tlb invalidation must be done before signaling rebind */ - if (ifence) { - if (mfence) - dma_fence_get(fence); - invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - if (mfence) { - invalidation_fence_init(tile->media_gt, mfence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - dma_fence_chain_init(chain_fence, &ifence->base.base, - &mfence->base.base, 0); - fence = &chain_fence->base; - } else { - fence = &ifence->base.base; - } - } - - if (!mfence) { - dma_resv_add_fence(xe_vm_resv(vm), fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); - } else { - dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, - &ifence->base.base, &mfence->base.base); - } ->>>>>>> - - if (!vma->tile_present) { - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - up_read(&vm->userptr.notifier_lock); - xe_bo_put_commit(&deferred); - } - - return fence; -<<<<<<< -======= - -free_rfence: - kfree(rfence); -free_ifence: - dma_fence_chain_free(chain_fence); - kfree(mfence); - kfree(ifence); -kill_vm_tile1: - if (err != -EAGAIN && tile->id) - xe_vm_kill(vops->vm, false); - - return ERR_PTR(err); -} - -/** - * xe_pt_update_ops_fini() - Finish PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operations - * - * Finish PT update operations by committing to destroy page table memory - */ -void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - for (i = 0; i < pt_update_ops->current_op; ++i) { - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; - - xe_pt_free_bind(pt_op->entries, pt_op->num_entries); - } - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); -} - -/** - * xe_pt_update_ops_abort() - Abort PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Abort PT update operations by unwinding internal PT state - */ -void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - for (i = pt_update_ops->num_ops - 1; i >= 0; --i) { - struct xe_vm_pgtable_update_op *pt_op = - &pt_update_ops->ops[i]; - - if (!pt_op->vma || i >= pt_update_ops->current_op) - continue; - - if (pt_op->bind) - xe_pt_abort_bind(pt_op->vma, pt_op->entries, - pt_op->num_entries, - pt_op->rebind); - else - xe_pt_abort_unbind(pt_op->vma, pt_op->entries, - pt_op->num_entries); - } - - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); ->>>>>>> -} diff --git a/rr-cache/4951c0e45d299a9570812ec9f1cc27e11aa21d6e/preimage.4 b/rr-cache/4951c0e45d299a9570812ec9f1cc27e11aa21d6e/preimage.4 deleted file mode 100644 index bb6eadffff5e..000000000000 --- a/rr-cache/4951c0e45d299a9570812ec9f1cc27e11aa21d6e/preimage.4 +++ /dev/null @@ -1,2248 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include <linux/dma-fence-array.h> - -#include "xe_pt.h" - -#include "regs/xe_gtt_defs.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_gt.h" -#include "xe_gt_tlb_invalidation.h" -#include "xe_migrate.h" -#include "xe_pt_types.h" -#include "xe_pt_walk.h" -#include "xe_res_cursor.h" -#include "xe_trace.h" -#include "xe_ttm_stolen_mgr.h" -#include "xe_vm.h" - -struct xe_pt_dir { - struct xe_pt pt; - /** @children: Array of page-table child nodes */ - struct xe_ptw *children[XE_PDES]; -}; - -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) -#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) -#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) -#else -#define xe_pt_set_addr(__xe_pt, __addr) -#define xe_pt_addr(__xe_pt) 0ull -#endif - -static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; -static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; - -#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) - -static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) -{ - return container_of(pt, struct xe_pt_dir, pt); -} - -static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) -{ - return container_of(pt_dir->children[index], struct xe_pt, base); -} - -static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, - unsigned int level) -{ - struct xe_device *xe = tile_to_xe(tile); - u16 pat_index = xe->pat.idx[XE_CACHE_WB]; - u8 id = tile->id; - - if (!xe_vm_has_scratch(vm)) - return 0; - - if (level > MAX_HUGEPTE_LEVEL) - return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, - 0, pat_index); - - return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | - XE_PTE_NULL; -} - -static void xe_pt_free(struct xe_pt *pt) -{ - if (pt->level) - kfree(as_xe_pt_dir(pt)); - else - kfree(pt); -} - -/** - * xe_pt_create() - Create a page-table. - * @vm: The vm to create for. - * @tile: The tile to create for. - * @level: The page-table level. - * - * Allocate and initialize a single struct xe_pt metadata structure. Also - * create the corresponding page-table bo, but don't initialize it. If the - * level is grater than zero, then it's assumed to be a directory page- - * table and the directory structure is also allocated and initialized to - * NULL pointers. - * - * Return: A valid struct xe_pt pointer on success, Pointer error code on - * error. - */ -struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level) -{ - struct xe_pt *pt; - struct xe_bo *bo; - int err; - - if (level) { - struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL); - - pt = (dir) ? &dir->pt : NULL; - } else { - pt = kzalloc(sizeof(*pt), GFP_KERNEL); - } - if (!pt) - return ERR_PTR(-ENOMEM); - - pt->level = level; - bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_NO_RESV_EVICT | - XE_BO_FLAG_PAGETABLE); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - goto err_kfree; - } - pt->bo = bo; - pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; - - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, pt->bo); - xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL); - - return pt; - -err_kfree: - xe_pt_free(pt); - return ERR_PTR(err); -} - -/** - * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero - * entries. - * @tile: The tile the scratch pagetable of which to use. - * @vm: The vm we populate for. - * @pt: The pagetable the bo of which to initialize. - * - * Populate the page-table bo of @pt with entries pointing into the tile's - * scratch page-table tree if any. Otherwise populate with zeros. - */ -void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, - struct xe_pt *pt) -{ - struct iosys_map *map = &pt->bo->vmap; - u64 empty; - int i; - - if (!xe_vm_has_scratch(vm)) { - /* - * FIXME: Some memory is allocated already allocated to zero? - * Find out which memory that is and avoid this memset... - */ - xe_map_memset(vm->xe, map, 0, 0, SZ_4K); - } else { - empty = __xe_pt_empty_pte(tile, vm, pt->level); - for (i = 0; i < XE_PDES; i++) - xe_pt_write(vm->xe, map, i, empty); - } -} - -/** - * xe_pt_shift() - Return the ilog2 value of the size of the address range of - * a page-table at a certain level. - * @level: The level. - * - * Return: The ilog2 value of the size of the address range of a page-table - * at level @level. - */ -unsigned int xe_pt_shift(unsigned int level) -{ - return XE_PTE_SHIFT + XE_PDE_SHIFT * level; -} - -/** - * xe_pt_destroy() - Destroy a page-table tree. - * @pt: The root of the page-table tree to destroy. - * @flags: vm flags. Currently unused. - * @deferred: List head of lockless list for deferred putting. NULL for - * immediate putting. - * - * Puts the page-table bo, recursively calls xe_pt_destroy on all children - * and finally frees @pt. TODO: Can we remove the @flags argument? - */ -void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) -{ - int i; - - if (!pt) - return; - - XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list)); - xe_bo_unpin(pt->bo); - xe_bo_put_deferred(pt->bo, deferred); - - if (pt->level > 0 && pt->num_live) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - - for (i = 0; i < XE_PDES; i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, - deferred); - } - } - xe_pt_free(pt); -} - -/** - * DOC: Pagetable building - * - * Below we use the term "page-table" for both page-directories, containing - * pointers to lower level page-directories or page-tables, and level 0 - * page-tables that contain only page-table-entries pointing to memory pages. - * - * When inserting an address range in an already existing page-table tree - * there will typically be a set of page-tables that are shared with other - * address ranges, and a set that are private to this address range. - * The set of shared page-tables can be at most two per level, - * and those can't be updated immediately because the entries of those - * page-tables may still be in use by the gpu for other mappings. Therefore - * when inserting entries into those, we instead stage those insertions by - * adding insertion data into struct xe_vm_pgtable_update structures. This - * data, (subtrees for the cpu and page-table-entries for the gpu) is then - * added in a separate commit step. CPU-data is committed while still under the - * vm lock, the object lock and for userptr, the notifier lock in read mode. - * The GPU async data is committed either by the GPU or CPU after fulfilling - * relevant dependencies. - * For non-shared page-tables (and, in fact, for shared ones that aren't - * existing at the time of staging), we add the data in-place without the - * special update structures. This private part of the page-table tree will - * remain disconnected from the vm page-table tree until data is committed to - * the shared page tables of the vm tree in the commit phase. - */ - -struct xe_pt_update { - /** @update: The update structure we're building for this parent. */ - struct xe_vm_pgtable_update *update; - /** @parent: The parent. Used to detect a parent change. */ - struct xe_pt *parent; - /** @preexisting: Whether the parent was pre-existing or allocated */ - bool preexisting; -}; - -struct xe_pt_stage_bind_walk { - /** base: The base class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @vm: The vm we're building for. */ - struct xe_vm *vm; - /** @tile: The tile we're building for. */ - struct xe_tile *tile; - /** @default_pte: PTE flag only template. No address is associated */ - u64 default_pte; - /** @dma_offset: DMA offset to add to the PTE. */ - u64 dma_offset; - /** - * @needs_64k: This address range enforces 64K alignment and - * granularity. - */ - bool needs_64K; - /** - * @vma: VMA being mapped - */ - struct xe_vma *vma; - - /* Also input, but is updated during the walk*/ - /** @curs: The DMA address cursor. */ - struct xe_res_cursor *curs; - /** @va_curs_start: The Virtual address coresponding to @curs->start */ - u64 va_curs_start; - - /* Output */ - struct xe_walk_update { - /** @wupd.entries: Caller provided storage. */ - struct xe_vm_pgtable_update *entries; - /** @wupd.num_used_entries: Number of update @entries used. */ - unsigned int num_used_entries; - /** @wupd.updates: Tracks the update entry at a given level */ - struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; - } wupd; - - /* Walk state */ - /** - * @l0_end_addr: The end address of the current l0 leaf. Used for - * 64K granularity detection. - */ - u64 l0_end_addr; - /** @addr_64K: The start address of the current 64K chunk. */ - u64 addr_64K; - /** @found_64: Whether @add_64K actually points to a 64K chunk. */ - bool found_64K; -}; - -static int -xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, - pgoff_t offset, bool alloc_entries) -{ - struct xe_pt_update *upd = &wupd->updates[parent->level]; - struct xe_vm_pgtable_update *entry; - - /* - * For *each level*, we could only have one active - * struct xt_pt_update at any one time. Once we move on to a - * new parent and page-directory, the old one is complete, and - * updates are either already stored in the build tree or in - * @wupd->entries - */ - if (likely(upd->parent == parent)) - return 0; - - upd->parent = parent; - upd->preexisting = true; - - if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) - return -EINVAL; - - entry = wupd->entries + wupd->num_used_entries++; - upd->update = entry; - entry->ofs = offset; - entry->pt_bo = parent->bo; - entry->pt = parent; - entry->flags = 0; - entry->qwords = 0; - - if (alloc_entries) { - entry->pt_entries = kmalloc_array(XE_PDES, - sizeof(*entry->pt_entries), - GFP_KERNEL); - if (!entry->pt_entries) - return -ENOMEM; - } - - return 0; -} - -/* - * NOTE: This is a very frequently called function so we allow ourselves - * to annotate (using branch prediction hints) the fastpath of updating a - * non-pre-existing pagetable with leaf ptes. - */ -static int -xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, - pgoff_t offset, struct xe_pt *xe_child, u64 pte) -{ - struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; - struct xe_pt_update *child_upd = xe_child ? - &xe_walk->wupd.updates[xe_child->level] : NULL; - int ret; - - ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); - if (unlikely(ret)) - return ret; - - /* - * Register this new pagetable so that it won't be recognized as - * a shared pagetable by a subsequent insertion. - */ - if (unlikely(child_upd)) { - child_upd->update = NULL; - child_upd->parent = xe_child; - child_upd->preexisting = false; - } - - if (likely(!upd->preexisting)) { - /* Continue building a non-connected subtree. */ - struct iosys_map *map = &parent->bo->vmap; - - if (unlikely(xe_child)) - parent->base.children[offset] = &xe_child->base; - - xe_pt_write(xe_walk->vm->xe, map, offset, pte); - parent->num_live++; - } else { - /* Shared pt. Stage update. */ - unsigned int idx; - struct xe_vm_pgtable_update *entry = upd->update; - - idx = offset - entry->ofs; - entry->pt_entries[idx].pt = xe_child; - entry->pt_entries[idx].pte = pte; - entry->qwords++; - } - - return 0; -} - -static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, - struct xe_pt_stage_bind_walk *xe_walk) -{ - u64 size, dma; - - if (level > MAX_HUGEPTE_LEVEL) - return false; - - /* Does the virtual range requested cover a huge pte? */ - if (!xe_pt_covers(addr, next, level, &xe_walk->base)) - return false; - - /* Does the DMA segment cover the whole pte? */ - if (next - xe_walk->va_curs_start > xe_walk->curs->size) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - /* Is the DMA address huge PTE size aligned? */ - size = next - addr; - dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); - - return IS_ALIGNED(dma, size); -} - -/* - * Scan the requested mapping to check whether it can be done entirely - * with 64K PTEs. - */ -static bool -xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - struct xe_res_cursor curs = *xe_walk->curs; - - if (!IS_ALIGNED(addr, SZ_64K)) - return false; - - if (next > xe_walk->l0_end_addr) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - xe_res_next(&curs, addr - xe_walk->va_curs_start); - for (; addr < next; addr += SZ_64K) { - if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) - return false; - - xe_res_next(&curs, SZ_64K); - } - - return addr == next; -} - -/* - * For non-compact "normal" 4K level-0 pagetables, we want to try to group - * addresses together in 64K-contigous regions to add a 64K TLB hint for the - * device to the PTE. - * This function determines whether the address is part of such a - * segment. For VRAM in normal pagetables, this is strictly necessary on - * some devices. - */ -static bool -xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - /* Address is within an already found 64k region */ - if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) - return true; - - xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); - xe_walk->addr_64K = addr; - - return xe_walk->found_64K; -} - -static int -xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_bind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - u16 pat_index = xe_walk->vma->pat_index; - struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); - struct xe_vm *vm = xe_walk->vm; - struct xe_pt *xe_child; - bool covers; - int ret = 0; - u64 pte; - - /* Is this a leaf entry ?*/ - if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { - struct xe_res_cursor *curs = xe_walk->curs; - bool is_null = xe_vma_is_null(xe_walk->vma); - - XE_WARN_ON(xe_walk->va_curs_start != addr); - - pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : - xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->vma, pat_index, level); - pte |= xe_walk->default_pte; - - /* - * Set the XE_PTE_PS64 hint if possible, otherwise if - * this device *requires* 64K PTE size for VRAM, fail. - */ - if (level == 0 && !xe_parent->is_compact) { - if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; - pte |= XE_PTE_PS64; - } else if (XE_WARN_ON(xe_walk->needs_64K)) { - return -EINVAL; - } - } - - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); - if (unlikely(ret)) - return ret; - - if (!is_null) - xe_res_next(curs, next - addr); - xe_walk->va_curs_start = next; - xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); - *action = ACTION_CONTINUE; - - return ret; - } - - /* - * Descending to lower level. Determine if we need to allocate a - * new page table or -directory, which we do if there is no - * previous one or there is one we can completely replace. - */ - if (level == 1) { - walk->shifts = xe_normal_pt_shifts; - xe_walk->l0_end_addr = next; - } - - covers = xe_pt_covers(addr, next, level, &xe_walk->base); - if (covers || !*child) { - u64 flags = 0; - - xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); - if (IS_ERR(xe_child)) - return PTR_ERR(xe_child); - - xe_pt_set_addr(xe_child, - round_down(addr, 1ull << walk->shifts[level])); - - if (!covers) - xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child); - - *child = &xe_child->base; - - /* - * Prefer the compact pagetable layout for L0 if possible. Only - * possible if VMA covers entire 2MB region as compact 64k and - * 4k pages cannot be mixed within a 2MB region. - * TODO: Suballocate the pt bo to avoid wasting a lot of - * memory. - */ - if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && - covers && xe_pt_scan_64K(addr, next, xe_walk)) { - walk->shifts = xe_compact_pt_shifts; - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT; - flags |= XE_PDE_64K; - xe_child->is_compact = true; - } - - pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, - pte); - } - - *action = ACTION_SUBTREE; - return ret; -} - -static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { - .pt_entry = xe_pt_stage_bind_entry, -}; - -/** - * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address - * range. - * @tile: The tile we're building for. - * @vma: The vma indicating the address range. - * @entries: Storage for the update entries used for connecting the tree to - * the main tree at commit time. - * @num_entries: On output contains the number of @entries used. - * - * This function builds a disconnected page-table tree for a given address - * range. The tree is connected to the main vm tree for the gpu using - * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). - * The function builds xe_vm_pgtable_update structures for already existing - * shared page-tables, and non-existing shared and non-shared page-tables - * are built and populated directly. - * - * Return 0 on success, negative error code on error. - */ -static int -xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - struct xe_device *xe = tile_to_xe(tile); - struct xe_bo *bo = xe_vma_bo(vma); - bool is_devmem = !xe_vma_is_userptr(vma) && bo && - (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); - struct xe_res_cursor curs; - struct xe_pt_stage_bind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_bind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .vm = xe_vma_vm(vma), - .tile = tile, - .curs = &curs, - .va_curs_start = xe_vma_start(vma), - .vma = vma, - .wupd.entries = entries, - .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - int ret; - - /** - * Default atomic expectations for different allocation scenarios are as follows: - * - * 1. Traditional API: When the VM is not in LR mode: - * - Device atomics are expected to function with all allocations. - * - * 2. Compute/SVM API: When the VM is in LR mode: - * - Device atomics are the default behavior when the bo is placed in a single region. - * - In all other cases device atomics will be disabled with AE=0 until an application - * request differently using a ioctl like madvise. - */ - if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { - if (xe_vm_in_lr_mode(xe_vma_vm(vma))) { - if (bo && xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - /** - * If a SMEM+LMEM allocation is backed by SMEM, a device - * atomics will cause a gpu page fault and which then - * gets migrated to LMEM, bind such allocations with - * device atomics enabled. - */ - else if (is_devmem && !xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } else { - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } - - /** - * Unset AE if the platform(PVC) doesn't support it on an - * allocation - */ - if (!xe->info.has_device_atomics_on_smem && !is_devmem) - xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE; - } - - if (is_devmem) { - xe_walk.default_pte |= XE_PPGTT_PTE_DM; - xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); - } - - if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) - xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); - - xe_bo_assert_held(bo); - - if (!xe_vma_is_null(vma)) { - if (xe_vma_is_userptr(vma)) - xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, - xe_vma_size(vma), &curs); - else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) - xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - else - xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - } else { - curs.size = xe_vma_size(vma); - } - - ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - *num_entries = xe_walk.wupd.num_used_entries; - return ret; -} - -/** - * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a - * shared pagetable. - * @addr: The start address within the non-shared pagetable. - * @end: The end address within the non-shared pagetable. - * @level: The level of the non-shared pagetable. - * @walk: Walk info. The function adjusts the walk action. - * @action: next action to perform (see enum page_walk_action) - * @offset: Ignored on input, First non-shared entry on output. - * @end_offset: Ignored on input, Last non-shared entry + 1 on output. - * - * A non-shared page-table has some entries that belong to the address range - * and others that don't. This function determines the entries that belong - * fully to the address range. Depending on level, some entries may - * partially belong to the address range (that can't happen at level 0). - * The function detects that and adjust those offsets to not include those - * partial entries. Iff it does detect partial entries, we know that there must - * be shared page tables also at lower levels, so it adjusts the walk action - * accordingly. - * - * Return: true if there were non-shared entries, false otherwise. - */ -static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, - struct xe_pt_walk *walk, - enum page_walk_action *action, - pgoff_t *offset, pgoff_t *end_offset) -{ - u64 size = 1ull << walk->shifts[level]; - - *offset = xe_pt_offset(addr, level, walk); - *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; - - if (!level) - return true; - - /* - * If addr or next are not size aligned, there are shared pts at lower - * level, so in that case traverse down the subtree - */ - *action = ACTION_CONTINUE; - if (!IS_ALIGNED(addr, size)) { - *action = ACTION_SUBTREE; - (*offset)++; - } - - if (!IS_ALIGNED(end, size)) { - *action = ACTION_SUBTREE; - (*end_offset)--; - } - - return *end_offset > *offset; -} - -struct xe_pt_zap_ptes_walk { - /** @base: The walk base-class */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're building for */ - struct xe_tile *tile; - - /* Output */ - /** @needs_invalidate: Whether we need to invalidate TLB*/ - bool needs_invalidate; -}; - -static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_zap_ptes_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - /* - * Note that we're called from an entry callback, and we're dealing - * with the child of that entry rather than the parent, so need to - * adjust level down. - */ - if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, - &end_offset)) { - xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap, - offset * sizeof(u64), 0, - (end_offset - offset) * sizeof(u64)); - xe_walk->needs_invalidate = true; - } - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { - .pt_entry = xe_pt_zap_ptes_entry, -}; - -/** - * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range - * @tile: The tile we're zapping for. - * @vma: GPU VMA detailing address range. - * - * Eviction and Userptr invalidation needs to be able to zap the - * gpu ptes of a given address range in pagefaulting mode. - * In order to be able to do that, that function needs access to the shared - * page-table entrieaso it can either clear the leaf PTEs or - * clear the pointers to lower-level page-tables. The caller is required - * to hold the necessary locks to ensure neither the page-table connectivity - * nor the page-table entries of the range is updated from under us. - * - * Return: Whether ptes were actually updated and a TLB invalidation is - * required. - */ -bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) -{ - struct xe_pt_zap_ptes_walk xe_walk = { - .base = { - .ops = &xe_pt_zap_ptes_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); - - if (!(pt_mask & BIT(tile->id))) - return false; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.needs_invalidate; -} - -static void -xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, - struct iosys_map *map, void *data, - u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_pt_entry *ptes = update->pt_entries; - u64 *ptr = data; - u32 i; - - for (i = 0; i < num_qwords; i++) { - if (map) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, ptes[i].pte); - else - ptr[i] = ptes[i].pte; - } -} - -static void xe_pt_abort_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i, j; - - for (i = 0; i < num_entries; i++) { - if (!entries[i].pt_entries) - continue; - - for (j = 0; j < entries[i].qwords; j++) - xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL); - kfree(entries[i].pt_entries); - } -} - -static void xe_pt_commit_locks_assert(struct xe_vma *vma) -{ - struct xe_vm *vm = xe_vma_vm(vma); - - lockdep_assert_held(&vm->lock); - - if (xe_vma_is_userptr(vma)) - lockdep_assert_held_read(&vm->userptr.notifier_lock); - else if (!xe_vma_is_null(vma)) - dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); - - xe_vm_assert_held(vm); -} - -static void xe_pt_commit_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries, bool rebind, - struct llist_head *deferred) -{ - u32 i, j; - - xe_pt_commit_locks_assert(vma); - - for (i = 0; i < num_entries; i++) { - struct xe_pt *pt = entries[i].pt; - struct xe_pt_dir *pt_dir; - - if (!rebind) - pt->num_live += entries[i].qwords; - - if (!pt->level) { - kfree(entries[i].pt_entries); - continue; - } - - pt_dir = as_xe_pt_dir(pt); - for (j = 0; j < entries[i].qwords; j++) { - u32 j_ = j + entries[i].ofs; - struct xe_pt *newpte = entries[i].pt_entries[j].pt; - - if (xe_pt_entry(pt_dir, j_)) - xe_pt_destroy(xe_pt_entry(pt_dir, j_), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[j_] = &newpte->base; - } - kfree(entries[i].pt_entries); - } -} - -static int -xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - int err; - - *num_entries = 0; - err = xe_pt_stage_bind(tile, vma, entries, num_entries); - if (!err) - xe_tile_assert(tile, *num_entries); - else /* abort! */ - xe_pt_abort_bind(vma, entries, *num_entries); - - return err; -} - -static void xe_vm_dbg_print_entries(struct xe_device *xe, - const struct xe_vm_pgtable_update *entries, - unsigned int num_entries) -#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) -{ - unsigned int i; - - vm_dbg(&xe->drm, "%u entries to update\n", num_entries); - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - struct xe_pt *xe_pt = entry->pt; - u64 page_size = 1ull << xe_pt_shift(xe_pt->level); - u64 end; - u64 start; - - xe_assert(xe, !entry->pt->is_compact); - start = entry->ofs * page_size; - end = start + page_size * entry->qwords; - vm_dbg(&xe->drm, - "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", - i, xe_pt->level, entry->ofs, entry->qwords, - xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); - } -} -#else -{} -#endif - -#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT - -static int xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; - static u32 count; - - if (count++ % divisor == divisor - 1) { - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - - uvma->userptr.divisor = divisor << 1; - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - return true; - } - - return false; -} - -#else - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - return false; -} - -#endif - -/** - * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks - * @base: Base we derive from. - * @bind: Whether this is a bind or an unbind operation. A bind operation - * makes the pre-commit callback error with -EAGAIN if it detects a - * pending invalidation. - * @locked: Whether the pre-commit callback locked the userptr notifier lock - * and it needs unlocking. - */ -struct xe_pt_migrate_pt_update { - struct xe_migrate_pt_update base; - bool bind; - bool locked; -}; - -/* - * This function adds the needed dependencies to a page-table update job - * to make sure racing jobs for separate bind engines don't race writing - * to the same page-table range, wreaking havoc. Initially use a single - * fence for the entire VM. An optimization would use smaller granularity. - */ -static int xe_pt_vm_dependencies(struct xe_sched_job *job, - struct xe_range_fence_tree *rftree, - u64 start, u64 last) -{ - struct xe_range_fence *rtfence; - struct dma_fence *fence; - int err; - - rtfence = xe_range_fence_tree_first(rftree, start, last); - while (rtfence) { - fence = rtfence->fence; - - if (!dma_fence_is_signaled(fence)) { - /* - * Is this a CPU update? GPU is busy updating, so return - * an error - */ - if (!job) - return -ETIME; - - dma_fence_get(fence); - err = drm_sched_job_add_dependency(&job->drm, fence); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_next(rtfence, start, last); - } - - return 0; -} - -static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_range_fence_tree *rftree = - &xe_vma_vm(pt_update->vma)->rftree[pt_update->tile_id]; - - return xe_pt_vm_dependencies(pt_update->job, rftree, - pt_update->start, pt_update->last); -} - -static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_pt_migrate_pt_update *userptr_update = - container_of(pt_update, typeof(*userptr_update), base); - struct xe_userptr_vma *uvma = to_userptr_vma(pt_update->vma); - unsigned long notifier_seq = uvma->userptr.notifier_seq; - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - int err = xe_pt_vm_dependencies(pt_update->job, - &vm->rftree[pt_update->tile_id], - pt_update->start, - pt_update->last); - - if (err) - return err; - - userptr_update->locked = false; - - /* - * Wait until nobody is running the invalidation notifier, and - * since we're exiting the loop holding the notifier lock, - * nobody can proceed invalidating either. - * - * Note that we don't update the vma->userptr.notifier_seq since - * we don't update the userptr pages. - */ - do { - down_read(&vm->userptr.notifier_lock); - if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq)) - break; - - up_read(&vm->userptr.notifier_lock); - - if (userptr_update->bind) - return -EAGAIN; - - notifier_seq = mmu_interval_read_begin(&uvma->userptr.notifier); - } while (true); - - /* Inject errors to test_whether they are handled correctly */ - if (userptr_update->bind && xe_pt_userptr_inject_eagain(uvma)) { - up_read(&vm->userptr.notifier_lock); - return -EAGAIN; - } - - userptr_update->locked = true; - - return 0; -} - -static const struct xe_migrate_pt_update_ops bind_ops = { - .populate = xe_vm_populate_pgtable, - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_bind_ops = { - .populate = xe_vm_populate_pgtable, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -struct invalidation_fence { - struct xe_gt_tlb_invalidation_fence base; - struct xe_gt *gt; - struct dma_fence *fence; - struct dma_fence_cb cb; - struct work_struct work; - u64 start; - u64 end; - u32 asid; -}; - -static void invalidation_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct invalidation_fence *ifence = - container_of(cb, struct invalidation_fence, cb); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); - if (!ifence->fence->error) { - queue_work(system_wq, &ifence->work); - } else { - ifence->base.base.error = ifence->fence->error; - dma_fence_signal(&ifence->base.base); - dma_fence_put(&ifence->base.base); - } - dma_fence_put(ifence->fence); -} - -static void invalidation_fence_work_func(struct work_struct *w) -{ - struct invalidation_fence *ifence = - container_of(w, struct invalidation_fence, work); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); - xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, - ifence->end, ifence->asid); -} - -static int invalidation_fence_init(struct xe_gt *gt, - struct invalidation_fence *ifence, - struct dma_fence *fence, - u64 start, u64 end, u32 asid) -{ - int ret; - - trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - - ifence->fence = fence; - ifence->gt = gt; - ifence->start = start; - ifence->end = end; - ifence->asid = asid; - - INIT_WORK(&ifence->work, invalidation_fence_work_func); - ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); - if (ret == -ENOENT) { - dma_fence_put(ifence->fence); /* Usually dropped in CB */ - invalidation_fence_work_func(&ifence->work); - } else if (ret) { - dma_fence_put(&ifence->base.base); /* Caller ref */ - dma_fence_put(&ifence->base.base); /* Creation ref */ - } - - xe_gt_assert(gt, !ret || ret == -ENOENT); - - return ret && ret != -ENOENT ? ret : 0; -} - -static void xe_pt_calc_rfence_interval(struct xe_vma *vma, - struct xe_pt_migrate_pt_update *update, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - int i, level = 0; - - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - update->base.start = ALIGN_DOWN(xe_vma_start(vma), - 0x1ull << xe_pt_shift(level)); - update->base.last = ALIGN(xe_vma_end(vma), - 0x1ull << xe_pt_shift(level)) - 1; -} - -/** - * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma - * address range. - * @tile: The tile to bind for. - * @vma: The vma to bind. - * @q: The exec_queue with which to do pipelined page-table updates. - * @syncs: Entries to sync on before binding the built tree to the live vm tree. - * @num_syncs: Number of @sync entries. - * @rebind: Whether we're rebinding this vma to the same address range without - * an unbind in-between. - * - * This function builds a page-table tree (see xe_pt_stage_bind() for more - * information on page-table building), and the xe_vm_pgtable_update entries - * abstracting the operations needed to attach it to the main vm tree. It - * then takes the relevant locks and updates the metadata side of the main - * vm tree and submits the operations for pipelined attachment of the - * gpu page-table to the vm main tree, (which can be done either by the - * cpu and the GPU). - * - * Return: A valid dma-fence representing the pipelined attachment operation - * on success, an error pointer on error. - */ -struct dma_fence * -__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs, - bool rebind) -{ - struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; - struct xe_pt_migrate_pt_update bind_pt_update = { - .base = { - .ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops, - .vma = vma, - .tile_id = tile->id, - }, - .bind = true, - }; - struct xe_vm *vm = xe_vma_vm(vma); - u32 num_entries; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL; - struct xe_range_fence *rfence; - int err; - - bind_pt_update.locked = false; - xe_bo_assert_held(xe_vma_bo(vma)); - xe_vm_assert_held(vm); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma), q); - - err = xe_pt_prepare_bind(tile, vma, entries, &num_entries); - if (err) - goto err; - - err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); - if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); - if (err) - goto err; - - xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); - - xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); - xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries, - num_entries); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. on LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * on !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) - return ERR_PTR(-ENOMEM); - } else if (rebind && !xe_vm_in_lr_mode(vm)) { - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - } - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - kfree(ifence); - return ERR_PTR(-ENOMEM); - } - - fence = xe_migrate_update_pgtables(tile->migrate, - vm, xe_vma_bo(vma), q, - entries, num_entries, - syncs, num_syncs, - &bind_pt_update.base); - if (!IS_ERR(fence)) { - bool last_munmap_rebind = vma->gpuva.flags & XE_VMA_LAST_REBIND; - LLIST_HEAD(deferred); - int err; - - err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - bind_pt_update.base.start, - bind_pt_update.base.last, fence); - if (err) - dma_fence_wait(fence, false); - - /* TLB invalidation must be done before signaling rebind */ - if (ifence) { - int err = invalidation_fence_init(tile->primary_gt, - ifence, fence, - xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); - if (err) { - dma_fence_put(fence); - kfree(ifence); - return ERR_PTR(err); - } - fence = &ifence->base.base; - } - - /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, rebind || - last_munmap_rebind ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - DMA_RESV_USAGE_BOOKKEEP); - xe_pt_commit_bind(vma, entries, num_entries, rebind, - bind_pt_update.locked ? &deferred : NULL); - - /* This vma is live (again?) now */ - vma->tile_present |= BIT(tile->id); - - if (bind_pt_update.locked) { - to_userptr_vma(vma)->userptr.initial_bind = true; - up_read(&vm->userptr.notifier_lock); - xe_bo_put_commit(&deferred); - } - if (!rebind && last_munmap_rebind && - xe_vm_in_preempt_fence_mode(vm)) - xe_vm_queue_rebind_worker(vm); - } else { - kfree(rfence); - kfree(ifence); - if (bind_pt_update.locked) - up_read(&vm->userptr.notifier_lock); - xe_pt_abort_bind(vma, entries, num_entries); - } - - return fence; - -err: - return ERR_PTR(err); -} - -struct xe_pt_stage_unbind_walk { - /** @base: The pagewalk base-class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're unbinding from. */ - struct xe_tile *tile; - - /** - * @modified_start: Walk range start, modified to include any - * shared pagetables that we're the only user of and can thus - * treat as private. - */ - u64 modified_start; - /** @modified_end: Walk range start, modified like @modified_start. */ - u64 modified_end; - - /* Output */ - /* @wupd: Structure to track the page-table updates we're building */ - struct xe_walk_update wupd; -}; - -/* - * Check whether this range is the only one populating this pagetable, - * and in that case, update the walk range checks so that higher levels don't - * view us as a shared pagetable. - */ -static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, - const struct xe_pt *child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - unsigned int shift = walk->shifts[level]; - u64 size = 1ull << shift; - - if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && - ((next - addr) >> shift) == child->num_live) { - u64 size = 1ull << walk->shifts[level + 1]; - - *action = ACTION_CONTINUE; - - if (xe_walk->modified_start >= addr) - xe_walk->modified_start = round_down(addr, size); - if (xe_walk->modified_end <= next) - xe_walk->modified_end = round_up(next, size); - - return true; - } - - return false; -} - -static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); - - return 0; -} - -static int -xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - u64 size = 1ull << walk->shifts[--level]; - - if (!IS_ALIGNED(addr, size)) - addr = xe_walk->modified_start; - if (!IS_ALIGNED(next, size)) - next = xe_walk->modified_end; - - /* Parent == *child is the root pt. Don't kill it. */ - if (parent != *child && - xe_pt_check_kill(addr, next, level, xe_child, action, walk)) - return 0; - - if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, - &end_offset)) - return 0; - - (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false); - xe_walk->wupd.updates[level].update->qwords = end_offset - offset; - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { - .pt_entry = xe_pt_stage_unbind_entry, - .pt_post_descend = xe_pt_stage_unbind_post_descend, -}; - -/** - * xe_pt_stage_unbind() - Build page-table update structures for an unbind - * operation - * @tile: The tile we're unbinding for. - * @vma: The vma we're unbinding. - * @entries: Caller-provided storage for the update structures. - * - * Builds page-table update structures for an unbind operation. The function - * will attempt to remove all page-tables that we're the only user - * of, and for that to work, the unbind operation must be committed in the - * same critical section that blocks racing binds to the same page-table tree. - * - * Return: The number of entries used. - */ -static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries) -{ - struct xe_pt_stage_unbind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_unbind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - .modified_start = xe_vma_start(vma), - .modified_end = xe_vma_end(vma), - .wupd.entries = entries, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.wupd.num_used_entries; -} - -static void -xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, - struct xe_tile *tile, struct iosys_map *map, - void *ptr, u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_vma *vma = pt_update->vma; - u64 empty = __xe_pt_empty_pte(tile, xe_vma_vm(vma), update->pt->level); - int i; - - if (map && map->is_iomem) - for (i = 0; i < num_qwords; ++i) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, empty); - else if (map) - memset64(map->vaddr + qword_ofs * sizeof(u64), empty, - num_qwords); - else - memset64(ptr, empty, num_qwords); -} - -static void -xe_pt_commit_unbind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 num_entries, - struct llist_head *deferred) -{ - u32 j; - - xe_pt_commit_locks_assert(vma); - - for (j = 0; j < num_entries; ++j) { - struct xe_vm_pgtable_update *entry = &entries[j]; - struct xe_pt *pt = entry->pt; - - pt->num_live -= entry->qwords; - if (pt->level) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - u32 i; - - for (i = entry->ofs; i < entry->ofs + entry->qwords; - i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[i] = NULL; - } - } - } -} - -<<<<<<< -static const struct xe_migrate_pt_update_ops unbind_ops = { - .populate = xe_migrate_clear_pgtable_callback, -======= -static void -xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int i, level = 0; - u64 start, last; - - for (i = 0; i < pt_op->num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &pt_op->entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level)); - last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1; - - if (start < pt_update_ops->start) - pt_update_ops->start = start; - if (last > pt_update_ops->last) - pt_update_ops->last = last; -} - -static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) -{ - int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0; - - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, - xe->info.tile_count << shift); - - return 0; -} - -static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int err; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - pt_op->vma = NULL; - pt_op->bind = true; - pt_op->rebind = BIT(tile->id) & vma->tile_present; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - err = xe_pt_prepare_bind(tile, vma, pt_op->entries, - &pt_op->num_entries); - if (!err) { - xe_tile_assert(tile, pt_op->num_entries <= - ARRAY_SIZE(pt_op->entries)); - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, true); - - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. On LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * On !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!pt_op->rebind && xe_vm_has_scratch(vm) && - xe_vm_in_preempt_fence_mode(vm))) - pt_update_ops->needs_invalidation = true; - else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - - vma->tile_staged |= BIT(tile->id); - pt_op->vma = vma; - xe_pt_commit_prepare_bind(vma, pt_op->entries, - pt_op->num_entries, pt_op->rebind); - } else { - xe_pt_cancel_bind(vma, pt_op->entries, pt_op->num_entries); - } - - return err; -} - -static int unbind_op_prepare(struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int err; - - if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id))) - return 0; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - /* - * Wait for invalidation to complete. Can corrupt internal page table - * state if an invalidation is running while preparing an unbind. - */ - if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma))) - mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier); - - pt_op->vma = vma; - pt_op->bind = false; - pt_op->rebind = false; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries); - - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, false); - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - pt_update_ops->needs_invalidation = true; - - xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries); - - return 0; -} - -static int op_prepare(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op) -{ - int err = 0; - - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma); - pt_update_ops->wait_vm_kernel = true; - break; - case DRM_GPUVA_OP_REMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va)); - - if (!err && op->remap.prev) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.prev); - pt_update_ops->wait_vm_bookkeep = true; - } - if (!err && op->remap.next) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.next); - pt_update_ops->wait_vm_bookkeep = true; - } - break; - case DRM_GPUVA_OP_UNMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va)); - break; - case DRM_GPUVA_OP_PREFETCH: - err = bind_op_prepare(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va)); - pt_update_ops->wait_vm_kernel = true; - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static void -xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops) -{ - init_llist_head(&pt_update_ops->deferred); - pt_update_ops->start = ~0x0ull; - pt_update_ops->last = 0x0ull; -} - -/** - * xe_pt_update_ops_prepare() - Prepare PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Prepare PT update operations which includes updating internal PT state, - * allocate memory for page tables, populate page table being pruned in, and - * create PT update operations for leaf insertion / removal. - * - * Return: 0 on success, negative error code on error. - */ -int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct xe_vma_op *op; - int shift = tile->media_gt ? 1 : 0; - int err; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - xe_pt_update_ops_init(pt_update_ops); - - err = dma_resv_reserve_fences(xe_vm_resv(vops->vm), - tile_to_xe(tile)->info.tile_count << shift); - if (err) - return err; - - list_for_each_entry(op, &vops->list, link) { - err = op_prepare(vops->vm, tile, pt_update_ops, op); - - if (err) - return err; - } - - xe_tile_assert(tile, pt_update_ops->current_op <= - pt_update_ops->num_ops); - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE) - return -ENOSPC; -#endif - - return 0; -} - -static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present |= BIT(tile->id); - vma->tile_staged &= ~BIT(tile->id); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - to_userptr_vma(vma)->userptr.initial_bind = true; - } - - /* - * Kick rebind worker if this bind triggers preempt fences and not in - * the rebind worker - */ - if (pt_update_ops->wait_vm_bookkeep && - xe_vm_in_preempt_fence_mode(vm) && - !current->mm) - xe_vm_queue_rebind_worker(vm); -} - -static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present &= ~BIT(tile->id); - if (!vma->tile_present) { - list_del_init(&vma->combined_links.rebind); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - } -} - -static void op_commit(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op, struct dma_fence *fence, - struct dma_fence *fence2) -{ - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, - fence2); - break; - case DRM_GPUVA_OP_REMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence, - fence2); - - if (op->remap.prev) - bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence, fence2); - if (op->remap.next) - bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence, fence2); - break; - case DRM_GPUVA_OP_UNMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence, fence2); - break; - case DRM_GPUVA_OP_PREFETCH: - bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence, fence2); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } -} - -static const struct xe_migrate_pt_update_ops migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, ->>>>>>> - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { - .populate = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -/** - * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma - * address range. - * @tile: The tile to unbind for. - * @vma: The vma to unbind. - * @q: The exec_queue with which to do pipelined page-table updates. - * @syncs: Entries to sync on before disconnecting the tree to be destroyed. - * @num_syncs: Number of @sync entries. - * - * This function builds a the xe_vm_pgtable_update entries abstracting the - * operations needed to detach the page-table tree to be destroyed from the - * man vm tree. - * It then takes the relevant locks and submits the operations for - * pipelined detachment of the gpu page-table from the vm main tree, - * (which can be done either by the cpu and the GPU), Finally it frees the - * detached page-table tree. - * - * Return: A valid dma-fence representing the pipelined detachment operation - * on success, an error pointer on error. - */ -struct dma_fence * -__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs) -{ -<<<<<<< - struct xe_vm *vm = vops->vm; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL, *mfence = NULL; - struct dma_fence **fences = NULL; - struct dma_fence_array *cf = NULL; - struct xe_range_fence *rfence; - struct xe_vma_op *op; - int err = 0, i; - struct xe_migrate_pt_update update = { - .ops = pt_update_ops->needs_userptr_lock ? - &userptr_migrate_ops : - &migrate_ops, - .vops = vops, - .tile_id = tile->id, -======= - struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; - struct xe_pt_migrate_pt_update unbind_pt_update = { - .base = { - .ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops : - &unbind_ops, - .vma = vma, - .tile_id = tile->id, - }, ->>>>>>> - }; - struct xe_vm *vm = xe_vma_vm(vma); - u32 num_entries; - struct dma_fence *fence = NULL; - struct invalidation_fence *ifence; - struct xe_range_fence *rfence; - int err; - - LLIST_HEAD(deferred); - - xe_bo_assert_held(xe_vma_bo(vma)); - xe_vm_assert_held(vm); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma), q); - - num_entries = xe_pt_stage_unbind(tile, vma, entries); - xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); - - xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); - xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, - num_entries); - -<<<<<<< - err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); - if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); - if (err) - return ERR_PTR(err); - - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) - return ERR_PTR(-ENOMEM); -======= - if (pt_update_ops->needs_invalidation) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) { - err = -ENOMEM; - goto kill_vm_tile1; - } - if (tile->media_gt) { - mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!mfence) { - err = -ENOMEM; - goto free_ifence; - } - fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL); - if (!fences) { - err = -ENOMEM; - goto free_ifence; - } - cf = dma_fence_array_alloc(2); - if (!cf) { - err = -ENOMEM; - goto free_ifence; - } - } - } ->>>>>>> - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - kfree(ifence); - return ERR_PTR(-ENOMEM); - } - - /* - * Even if we were already evicted and unbind to destroy, we need to - * clear again here. The eviction may have updated pagetables at a - * lower level, because it needs to be more conservative. - */ - fence = xe_migrate_update_pgtables(tile->migrate, - vm, NULL, q ? q : - vm->q[tile->id], - entries, num_entries, - syncs, num_syncs, - &unbind_pt_update.base); - if (!IS_ERR(fence)) { - int err; - - err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - unbind_pt_update.base.start, - unbind_pt_update.base.last, fence); - if (err) - dma_fence_wait(fence, false); - -<<<<<<< - /* TLB invalidation must be done before signaling unbind */ - err = invalidation_fence_init(tile->primary_gt, ifence, fence, - xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); - if (err) { - dma_fence_put(fence); - kfree(ifence); - return ERR_PTR(err); - } - fence = &ifence->base.base; - - /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, - DMA_RESV_USAGE_BOOKKEEP); - - /* This fence will be installed by caller when doing eviction */ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - DMA_RESV_USAGE_BOOKKEEP); - xe_pt_commit_unbind(vma, entries, num_entries, - unbind_pt_update.locked ? &deferred : NULL); - vma->tile_present &= ~BIT(tile->id); - } else { - kfree(rfence); - kfree(ifence); - } - - if (!vma->tile_present) - list_del_init(&vma->combined_links.rebind); - - if (unbind_pt_update.locked) { - xe_tile_assert(tile, xe_vma_is_userptr(vma)); -======= - xe_pt_commit(pt_op->vma, pt_op->entries, - pt_op->num_entries, &pt_update_ops->deferred); - pt_op->vma = NULL; /* skip in xe_pt_update_ops_abort */ - } - - if (xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - pt_update_ops->start, - pt_update_ops->last, fence)) - dma_fence_wait(fence, false); - - /* tlb invalidation must be done before signaling rebind */ - if (ifence) { - if (mfence) - dma_fence_get(fence); - invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - if (mfence) { - invalidation_fence_init(tile->media_gt, mfence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - fences[0] = &ifence->base.base; - fences[1] = &mfence->base.base; - dma_fence_array_init(cf, 2, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - fence = &cf->base; - } else { - fence = &ifence->base.base; - } - } - - if (!mfence) { - dma_resv_add_fence(xe_vm_resv(vm), fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); - } else { - dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, - &ifence->base.base, &mfence->base.base); - } ->>>>>>> - - if (!vma->tile_present) { - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - up_read(&vm->userptr.notifier_lock); - xe_bo_put_commit(&deferred); - } - - return fence; -<<<<<<< -======= - -free_rfence: - kfree(rfence); -free_ifence: - kfree(cf); - kfree(fences); - kfree(mfence); - kfree(ifence); -kill_vm_tile1: - if (err != -EAGAIN && tile->id) - xe_vm_kill(vops->vm, false); - - return ERR_PTR(err); -} - -/** - * xe_pt_update_ops_fini() - Finish PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operations - * - * Finish PT update operations by committing to destroy page table memory - */ -void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - for (i = 0; i < pt_update_ops->current_op; ++i) { - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; - - xe_pt_free_bind(pt_op->entries, pt_op->num_entries); - } - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); -} - -/** - * xe_pt_update_ops_abort() - Abort PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Abort PT update operations by unwinding internal PT state - */ -void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - for (i = pt_update_ops->num_ops - 1; i >= 0; --i) { - struct xe_vm_pgtable_update_op *pt_op = - &pt_update_ops->ops[i]; - - if (!pt_op->vma || i >= pt_update_ops->current_op) - continue; - - if (pt_op->bind) - xe_pt_abort_bind(pt_op->vma, pt_op->entries, - pt_op->num_entries, - pt_op->rebind); - else - xe_pt_abort_unbind(pt_op->vma, pt_op->entries, - pt_op->num_entries); - } - - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); ->>>>>>> -} diff --git a/rr-cache/6e989852f5454d81ebf331bbd2c55116dc711575/preimage.8 b/rr-cache/6e989852f5454d81ebf331bbd2c55116dc711575/preimage.8 deleted file mode 100644 index 42ffb0f8cccc..000000000000 --- a/rr-cache/6e989852f5454d81ebf331bbd2c55116dc711575/preimage.8 +++ /dev/null @@ -1,982 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2021 Intel Corporation - */ - -#include "xe_exec_queue.h" - -#include <linux/nospec.h> - -#include <drm/drm_device.h> -#include <drm/drm_file.h> -#include <drm/xe_drm.h> - -#include "xe_device.h" -#include "xe_gt.h" -#include "xe_hw_engine_class_sysfs.h" -#include "xe_hw_fence.h" -#include "xe_lrc.h" -#include "xe_macros.h" -#include "xe_migrate.h" -#include "xe_pm.h" -#include "xe_ring_ops_types.h" -#include "xe_trace.h" -#include "xe_vm.h" - -enum xe_exec_queue_sched_prop { - XE_EXEC_QUEUE_JOB_TIMEOUT = 0, - XE_EXEC_QUEUE_TIMESLICE = 1, - XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2, - XE_EXEC_QUEUE_SCHED_PROP_MAX = 3, -}; - -static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, - u64 extensions, int ext_number); - -static void __xe_exec_queue_free(struct xe_exec_queue *q) -{ - if (q->vm) - xe_vm_put(q->vm); - - if (q->xef) - xe_file_put(q->xef); - - kfree(q); -} - -static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, - struct xe_vm *vm, - u32 logical_mask, - u16 width, struct xe_hw_engine *hwe, - u32 flags, u64 extensions) -{ - struct xe_exec_queue *q; - struct xe_gt *gt = hwe->gt; - int err; - - /* only kernel queues can be permanent */ - XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL)); - - q = kzalloc(struct_size(q, lrc, width), GFP_KERNEL); - if (!q) - return ERR_PTR(-ENOMEM); - - kref_init(&q->refcount); - q->flags = flags; - q->hwe = hwe; - q->gt = gt; - q->class = hwe->class; - q->width = width; - q->logical_mask = logical_mask; - q->fence_irq = >->fence_irq[hwe->class]; - q->ring_ops = gt->ring_ops[hwe->class]; - q->ops = gt->exec_queue_ops; - INIT_LIST_HEAD(&q->lr.link); - INIT_LIST_HEAD(&q->multi_gt_link); - - q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; - q->sched_props.preempt_timeout_us = - hwe->eclass->sched_props.preempt_timeout_us; - q->sched_props.job_timeout_ms = - hwe->eclass->sched_props.job_timeout_ms; - if (q->flags & EXEC_QUEUE_FLAG_KERNEL && - q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY) - q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; - else - q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; - - if (vm) - q->vm = xe_vm_get(vm); - - if (extensions) { - /* - * may set q->usm, must come before xe_lrc_create(), - * may overwrite q->sched_props, must come before q->ops->init() - */ - err = exec_queue_user_extensions(xe, q, extensions, 0); - if (err) { - __xe_exec_queue_free(q); - return ERR_PTR(err); - } - } - - return q; -} - -static int __xe_exec_queue_init(struct xe_exec_queue *q) -{ - struct xe_vm *vm = q->vm; - int i, err; - - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - return err; - } - - for (i = 0; i < q->width; ++i) { - q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K); - if (IS_ERR(q->lrc[i])) { - err = PTR_ERR(q->lrc[i]); - goto err_unlock; - } - } - - if (vm) - xe_vm_unlock(vm); - - err = q->ops->init(q); - if (err) - goto err_lrc; - - return 0; - -err_unlock: - if (vm) - xe_vm_unlock(vm); -err_lrc: - for (i = i - 1; i >= 0; --i) - xe_lrc_put(q->lrc[i]); - return err; -} - -struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, - u32 logical_mask, u16 width, - struct xe_hw_engine *hwe, u32 flags, - u64 extensions) -{ - struct xe_exec_queue *q; - int err; - - q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags, - extensions); - if (IS_ERR(q)) - return q; - - err = __xe_exec_queue_init(q); - if (err) - goto err_post_alloc; - - return q; - -err_post_alloc: - __xe_exec_queue_free(q); - return ERR_PTR(err); -} - -struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, - struct xe_vm *vm, - enum xe_engine_class class, - u32 flags, u64 extensions) -{ - struct xe_hw_engine *hwe, *hwe0 = NULL; - enum xe_hw_engine_id id; - u32 logical_mask = 0; - - for_each_hw_engine(hwe, gt, id) { - if (xe_hw_engine_is_reserved(hwe)) - continue; - - if (hwe->class == class) { - logical_mask |= BIT(hwe->logical_instance); - if (!hwe0) - hwe0 = hwe; - } - } - - if (!logical_mask) - return ERR_PTR(-ENODEV); - - return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions); -} - -/** - * xe_exec_queue_create_bind() - Create bind exec queue. - * @xe: Xe device. - * @tile: tile which bind exec queue belongs to. - * @flags: exec queue creation flags - * @extensions: exec queue creation extensions - * - * Normalize bind exec queue creation. Bind exec queue is tied to migration VM - * for access to physical memory required for page table programming. On a - * faulting devices the reserved copy engine instance must be used to avoid - * deadlocking (user binds cannot get stuck behind faults as kernel binds which - * resolve faults depend on user binds). On non-faulting devices any copy engine - * can be used. - * - * Returns exec queue on success, ERR_PTR on failure - */ -struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, - struct xe_tile *tile, - u32 flags, u64 extensions) -{ - struct xe_gt *gt = tile->primary_gt; - struct xe_exec_queue *q; - struct xe_vm *migrate_vm; - - migrate_vm = xe_migrate_get_vm(tile->migrate); - if (xe->info.has_usm) { - struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, - XE_ENGINE_CLASS_COPY, - gt->usm.reserved_bcs_instance, - false); - - if (!hwe) - return ERR_PTR(-EINVAL); - - q = xe_exec_queue_create(xe, migrate_vm, - BIT(hwe->logical_instance), 1, hwe, - flags, extensions); - } else { - q = xe_exec_queue_create_class(xe, gt, migrate_vm, - XE_ENGINE_CLASS_COPY, flags, - extensions); - } - xe_vm_put(migrate_vm); - - return q; -} - -void xe_exec_queue_destroy(struct kref *ref) -{ - struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); - struct xe_exec_queue *eq, *next; - - xe_exec_queue_last_fence_put_unlocked(q); - if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { - list_for_each_entry_safe(eq, next, &q->multi_gt_list, - multi_gt_link) - xe_exec_queue_put(eq); - } - - q->ops->fini(q); -} - -void xe_exec_queue_fini(struct xe_exec_queue *q) -{ - int i; - - for (i = 0; i < q->width; ++i) - xe_lrc_put(q->lrc[i]); - __xe_exec_queue_free(q); -} - -void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) -{ - switch (q->class) { - case XE_ENGINE_CLASS_RENDER: - snprintf(q->name, sizeof(q->name), "rcs%d", instance); - break; - case XE_ENGINE_CLASS_VIDEO_DECODE: - snprintf(q->name, sizeof(q->name), "vcs%d", instance); - break; - case XE_ENGINE_CLASS_VIDEO_ENHANCE: - snprintf(q->name, sizeof(q->name), "vecs%d", instance); - break; - case XE_ENGINE_CLASS_COPY: - snprintf(q->name, sizeof(q->name), "bcs%d", instance); - break; - case XE_ENGINE_CLASS_COMPUTE: - snprintf(q->name, sizeof(q->name), "ccs%d", instance); - break; - case XE_ENGINE_CLASS_OTHER: - snprintf(q->name, sizeof(q->name), "gsccs%d", instance); - break; - default: - XE_WARN_ON(q->class); - } -} - -struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id) -{ - struct xe_exec_queue *q; - - mutex_lock(&xef->exec_queue.lock); - q = xa_load(&xef->exec_queue.xa, id); - if (q) - xe_exec_queue_get(q); - mutex_unlock(&xef->exec_queue.lock); - - return q; -} - -enum xe_exec_queue_priority -xe_exec_queue_device_get_max_priority(struct xe_device *xe) -{ - return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH : - XE_EXEC_QUEUE_PRIORITY_NORMAL; -} - -static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q, - u64 value) -{ - if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe))) - return -EPERM; - - q->sched_props.priority = value; - return 0; -} - -static bool xe_exec_queue_enforce_schedule_limit(void) -{ -#if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) - return true; -#else - return !capable(CAP_SYS_NICE); -#endif -} - -static void -xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass, - enum xe_exec_queue_sched_prop prop, - u32 *min, u32 *max) -{ - switch (prop) { - case XE_EXEC_QUEUE_JOB_TIMEOUT: - *min = eclass->sched_props.job_timeout_min; - *max = eclass->sched_props.job_timeout_max; - break; - case XE_EXEC_QUEUE_TIMESLICE: - *min = eclass->sched_props.timeslice_min; - *max = eclass->sched_props.timeslice_max; - break; - case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: - *min = eclass->sched_props.preempt_timeout_min; - *max = eclass->sched_props.preempt_timeout_max; - break; - default: - break; - } -#if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) - if (capable(CAP_SYS_NICE)) { - switch (prop) { - case XE_EXEC_QUEUE_JOB_TIMEOUT: - *min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; - *max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; - break; - case XE_EXEC_QUEUE_TIMESLICE: - *min = XE_HW_ENGINE_TIMESLICE_MIN; - *max = XE_HW_ENGINE_TIMESLICE_MAX; - break; - case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: - *min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; - *max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; - break; - default: - break; - } - } -#endif -} - -static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q, - u64 value) -{ - u32 min = 0, max = 0; - - xe_exec_queue_get_prop_minmax(q->hwe->eclass, - XE_EXEC_QUEUE_TIMESLICE, &min, &max); - - if (xe_exec_queue_enforce_schedule_limit() && - !xe_hw_engine_timeout_in_range(value, min, max)) - return -EINVAL; - - q->sched_props.timeslice_us = value; - return 0; -} - -typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, - struct xe_exec_queue *q, - u64 value); - -static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, -}; - -static int exec_queue_user_ext_set_property(struct xe_device *xe, - struct xe_exec_queue *q, - u64 extension) -{ - u64 __user *address = u64_to_user_ptr(extension); - struct drm_xe_ext_set_property ext; - int err; - u32 idx; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(xe, ext.property >= - ARRAY_SIZE(exec_queue_set_property_funcs)) || - XE_IOCTL_DBG(xe, ext.pad) || - XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && - ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE)) - return -EINVAL; - - idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); - if (!exec_queue_set_property_funcs[idx]) - return -EINVAL; - - return exec_queue_set_property_funcs[idx](xe, q, ext.value); -} - -typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, - struct xe_exec_queue *q, - u64 extension); - -static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = { - [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, -}; - -#define MAX_USER_EXTENSIONS 16 -static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, - u64 extensions, int ext_number) -{ - u64 __user *address = u64_to_user_ptr(extensions); - struct drm_xe_user_extension ext; - int err; - u32 idx; - - if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) - return -E2BIG; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(xe, ext.pad) || - XE_IOCTL_DBG(xe, ext.name >= - ARRAY_SIZE(exec_queue_user_extension_funcs))) - return -EINVAL; - - idx = array_index_nospec(ext.name, - ARRAY_SIZE(exec_queue_user_extension_funcs)); - err = exec_queue_user_extension_funcs[idx](xe, q, extensions); - if (XE_IOCTL_DBG(xe, err)) - return err; - - if (ext.next_extension) - return exec_queue_user_extensions(xe, q, ext.next_extension, - ++ext_number); - - return 0; -} - -<<<<<<< -======= -static const enum xe_engine_class user_to_xe_engine_class[] = { - [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, - [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, - [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, - [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, - [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, -}; - -static struct xe_hw_engine * -find_hw_engine(struct xe_device *xe, - struct drm_xe_engine_class_instance eci) -{ - u32 idx; - - if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) - return NULL; - - if (eci.gt_id >= xe->info.gt_count) - return NULL; - - idx = array_index_nospec(eci.engine_class, - ARRAY_SIZE(user_to_xe_engine_class)); - - return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), - user_to_xe_engine_class[idx], - eci.engine_instance, true); -} - -static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt, - struct drm_xe_engine_class_instance *eci, - u16 width, u16 num_placements) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - u32 logical_mask = 0; - - if (XE_IOCTL_DBG(xe, width != 1)) - return 0; - if (XE_IOCTL_DBG(xe, num_placements != 1)) - return 0; - if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) - return 0; - - eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; - - for_each_hw_engine(hwe, gt, id) { - if (xe_hw_engine_is_reserved(hwe)) - continue; - - if (hwe->class == - user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY]) - logical_mask |= BIT(hwe->logical_instance); - } - - return logical_mask; -} - ->>>>>>> -static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, - struct drm_xe_engine_class_instance *eci, - u16 width, u16 num_placements) -{ - int len = width * num_placements; - int i, j, n; - u16 class; - u16 gt_id; - u32 return_mask = 0, prev_mask; - - if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) && - len > 1)) - return 0; - - for (i = 0; i < width; ++i) { - u32 current_mask = 0; - - for (j = 0; j < num_placements; ++j) { - struct xe_hw_engine *hwe; - - n = j * width + i; - - hwe = find_hw_engine(xe, eci[n]); - if (XE_IOCTL_DBG(xe, !hwe)) - return 0; - - if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe))) - return 0; - - if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) || - XE_IOCTL_DBG(xe, n && eci[n].engine_class != class)) - return 0; - - class = eci[n].engine_class; - gt_id = eci[n].gt_id; - - if (width == 1 || !i) - return_mask |= BIT(eci[n].engine_instance); - current_mask |= BIT(eci[n].engine_instance); - } - - /* Parallel submissions must be logically contiguous */ - if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1)) - return 0; - - prev_mask = current_mask; - } - - return return_mask; -} - -int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_exec_queue_create *args = data; - struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; - struct drm_xe_engine_class_instance __user *user_eci = - u64_to_user_ptr(args->instances); - struct xe_hw_engine *hwe; - struct xe_vm *vm; - struct xe_gt *gt; - struct xe_tile *tile; - struct xe_exec_queue *q = NULL; - u32 logical_mask; - u32 id; - u32 len; - int err; - - if (XE_IOCTL_DBG(xe, args->flags) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - len = args->width * args->num_placements; - if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) - return -EINVAL; - - err = __copy_from_user(eci, user_eci, - sizeof(struct drm_xe_engine_class_instance) * - len); - if (XE_IOCTL_DBG(xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) - return -EINVAL; - - if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { - if (XE_IOCTL_DBG(xe, args->width != 1) || - XE_IOCTL_DBG(xe, args->num_placements != 1) || - XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) - return -EINVAL; - - for_each_tile(tile, xe, id) { - struct xe_exec_queue *new; - u32 flags = EXEC_QUEUE_FLAG_VM; - - if (id) - flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; - -<<<<<<< - eci[0].gt_id = gt->info.id; - logical_mask = bind_exec_queue_logical_mask(xe, gt, eci, - args->width, - args->num_placements); - if (XE_IOCTL_DBG(xe, !logical_mask)) - return -EINVAL; - - hwe = find_hw_engine(xe, eci[0]); - if (XE_IOCTL_DBG(xe, !hwe)) - return -EINVAL; - - /* The migration vm doesn't hold rpm ref */ - xe_pm_runtime_get_noresume(xe); - - flags = EXEC_QUEUE_FLAG_VM | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0); - - migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); - new = xe_exec_queue_create(xe, migrate_vm, logical_mask, - args->width, hwe, flags, - args->extensions); - - xe_pm_runtime_put(xe); /* now held by engine */ - - xe_vm_put(migrate_vm); -======= - new = xe_exec_queue_create_bind(xe, tile, flags, - args->extensions); ->>>>>>> - if (IS_ERR(new)) { - err = PTR_ERR(new); - if (q) - goto put_exec_queue; - return err; - } - if (id == 0) - q = new; - else - list_add_tail(&new->multi_gt_list, - &q->multi_gt_link); - } - } else { - gt = xe_device_get_gt(xe, eci[0].gt_id); - logical_mask = calc_validate_logical_mask(xe, gt, eci, - args->width, - args->num_placements); - if (XE_IOCTL_DBG(xe, !logical_mask)) - return -EINVAL; - - hwe = find_hw_engine(xe, eci[0]); - if (XE_IOCTL_DBG(xe, !hwe)) - return -EINVAL; - - vm = xe_vm_lookup(xef, args->vm_id); - if (XE_IOCTL_DBG(xe, !vm)) - return -ENOENT; - - err = down_read_interruptible(&vm->lock); - if (err) { - xe_vm_put(vm); - return err; - } - - if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { - up_read(&vm->lock); - xe_vm_put(vm); - return -ENOENT; - } - - q = xe_exec_queue_create(xe, vm, logical_mask, - args->width, hwe, 0, - args->extensions); - up_read(&vm->lock); - xe_vm_put(vm); - if (IS_ERR(q)) - return PTR_ERR(q); - - if (xe_vm_in_preempt_fence_mode(vm)) { - q->lr.context = dma_fence_context_alloc(1); - - err = xe_vm_add_compute_exec_queue(vm, q); - if (XE_IOCTL_DBG(xe, err)) - goto put_exec_queue; - } - } - - mutex_lock(&xef->exec_queue.lock); - err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); - mutex_unlock(&xef->exec_queue.lock); - if (err) - goto kill_exec_queue; - - args->exec_queue_id = id; - q->xef = xe_file_get(xef); - - return 0; - -kill_exec_queue: - xe_exec_queue_kill(q); -put_exec_queue: - xe_exec_queue_put(q); - return err; -} - -int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_exec_queue_get_property *args = data; - struct xe_exec_queue *q; - int ret; - - if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - q = xe_exec_queue_lookup(xef, args->exec_queue_id); - if (XE_IOCTL_DBG(xe, !q)) - return -ENOENT; - - switch (args->property) { - case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN: - args->value = q->ops->reset_status(q); - ret = 0; - break; - default: - ret = -EINVAL; - } - - xe_exec_queue_put(q); - - return ret; -} - -/** - * xe_exec_queue_is_lr() - Whether an exec_queue is long-running - * @q: The exec_queue - * - * Return: True if the exec_queue is long-running, false otherwise. - */ -bool xe_exec_queue_is_lr(struct xe_exec_queue *q) -{ - return q->vm && xe_vm_in_lr_mode(q->vm) && - !(q->flags & EXEC_QUEUE_FLAG_VM); -} - -static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q) -{ - return q->lrc[0]->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc[0]) - 1; -} - -/** - * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full - * @q: The exec_queue - * - * Return: True if the exec_queue's ring is full, false otherwise. - */ -bool xe_exec_queue_ring_full(struct xe_exec_queue *q) -{ - struct xe_lrc *lrc = q->lrc[0]; - s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES; - - return xe_exec_queue_num_job_inflight(q) >= max_job; -} - -/** - * xe_exec_queue_is_idle() - Whether an exec_queue is idle. - * @q: The exec_queue - * - * FIXME: Need to determine what to use as the short-lived - * timeline lock for the exec_queues, so that the return value - * of this function becomes more than just an advisory - * snapshot in time. The timeline lock must protect the - * seqno from racing submissions on the same exec_queue. - * Typically vm->resv, but user-created timeline locks use the migrate vm - * and never grabs the migrate vm->resv so we have a race there. - * - * Return: True if the exec_queue is idle, false otherwise. - */ -bool xe_exec_queue_is_idle(struct xe_exec_queue *q) -{ - if (xe_exec_queue_is_parallel(q)) { - int i; - - for (i = 0; i < q->width; ++i) { - if (xe_lrc_seqno(q->lrc[i]) != - q->lrc[i]->fence_ctx.next_seqno - 1) - return false; - } - - return true; - } - - return xe_lrc_seqno(q->lrc[0]) == - q->lrc[0]->fence_ctx.next_seqno - 1; -} - -/** - * xe_exec_queue_update_run_ticks() - Update run time in ticks for this exec queue - * from hw - * @q: The exec queue - * - * Update the timestamp saved by HW for this exec queue and save run ticks - * calculated by using the delta from last update. - */ -void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) -{ - struct xe_file *xef; - struct xe_lrc *lrc; - u32 old_ts, new_ts; - - /* - * Jobs that are run during driver load may use an exec_queue, but are - * not associated with a user xe file, so avoid accumulating busyness - * for kernel specific work. - */ - if (!q->vm || !q->vm->xef) - return; - - xef = q->vm->xef; - - /* - * Only sample the first LRC. For parallel submission, all of them are - * scheduled together and we compensate that below by multiplying by - * width - this may introduce errors if that premise is not true and - * they don't exit 100% aligned. On the other hand, looping through - * the LRCs and reading them in different time could also introduce - * errors. - */ - lrc = q->lrc[0]; - new_ts = xe_lrc_update_timestamp(lrc, &old_ts); - xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; -} - -void xe_exec_queue_kill(struct xe_exec_queue *q) -{ - struct xe_exec_queue *eq = q, *next; - - list_for_each_entry_safe(eq, next, &eq->multi_gt_list, - multi_gt_link) { - q->ops->kill(eq); - xe_vm_remove_compute_exec_queue(q->vm, eq); - } - - q->ops->kill(q); - xe_vm_remove_compute_exec_queue(q->vm, q); -} - -int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_exec_queue_destroy *args = data; - struct xe_exec_queue *q; - - if (XE_IOCTL_DBG(xe, args->pad) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - mutex_lock(&xef->exec_queue.lock); - q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id); - mutex_unlock(&xef->exec_queue.lock); - if (XE_IOCTL_DBG(xe, !q)) - return -ENOENT; - - xe_exec_queue_kill(q); - - trace_xe_exec_queue_close(q); - xe_exec_queue_put(q); - - return 0; -} - -static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, - struct xe_vm *vm) -{ - if (q->flags & EXEC_QUEUE_FLAG_VM) - lockdep_assert_held(&vm->lock); - else - xe_vm_assert_held(vm); -} - -/** - * xe_exec_queue_last_fence_put() - Drop ref to last fence - * @q: The exec queue - * @vm: The VM the engine does a bind or exec for - */ -void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) -{ - xe_exec_queue_last_fence_lockdep_assert(q, vm); - - if (q->last_fence) { - dma_fence_put(q->last_fence); - q->last_fence = NULL; - } -} - -/** - * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked - * @q: The exec queue - * - * Only safe to be called from xe_exec_queue_destroy(). - */ -void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q) -{ - if (q->last_fence) { - dma_fence_put(q->last_fence); - q->last_fence = NULL; - } -} - -/** - * xe_exec_queue_last_fence_get() - Get last fence - * @q: The exec queue - * @vm: The VM the engine does a bind or exec for - * - * Get last fence, takes a ref - * - * Returns: last fence if not signaled, dma fence stub if signaled - */ -struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, - struct xe_vm *vm) -{ - struct dma_fence *fence; - - xe_exec_queue_last_fence_lockdep_assert(q, vm); - - if (q->last_fence && - test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) - xe_exec_queue_last_fence_put(q, vm); - - fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); - dma_fence_get(fence); - return fence; -} - -/** - * xe_exec_queue_last_fence_set() - Set last fence - * @q: The exec queue - * @vm: The VM the engine does a bind or exec for - * @fence: The fence - * - * Set the last fence for the engine. Increases reference count for fence, when - * closing engine xe_exec_queue_last_fence_put should be called. - */ -void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, - struct dma_fence *fence) -{ - xe_exec_queue_last_fence_lockdep_assert(q, vm); - - xe_exec_queue_last_fence_put(q, vm); - q->last_fence = dma_fence_get(fence); -} diff --git a/rr-cache/e7c9aafc2297a37f89715cfeed48ccbfb82f76bb/preimage.11 b/rr-cache/e7c9aafc2297a37f89715cfeed48ccbfb82f76bb/preimage.11 deleted file mode 100644 index 110e70f7ee7b..000000000000 --- a/rr-cache/e7c9aafc2297a37f89715cfeed48ccbfb82f76bb/preimage.11 +++ /dev/null @@ -1,2244 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include "xe_guc_submit.h" - -#include <linux/bitfield.h> -#include <linux/bitmap.h> -#include <linux/circ_buf.h> -#include <linux/delay.h> -#include <linux/dma-fence-array.h> -#include <linux/math64.h> - -#include <drm/drm_managed.h> - -#include "abi/guc_actions_abi.h" -#include "abi/guc_klvs_abi.h" -#include "regs/xe_lrc_layout.h" -#include "xe_assert.h" -#include "xe_devcoredump.h" -#include "xe_device.h" -#include "xe_exec_queue.h" -#include "xe_force_wake.h" -#include "xe_gpu_scheduler.h" -#include "xe_gt.h" -#include "xe_gt_clock.h" -#include "xe_gt_printk.h" -#include "xe_guc.h" -#include "xe_guc_ct.h" -#include "xe_guc_exec_queue_types.h" -#include "xe_guc_id_mgr.h" -#include "xe_guc_submit_types.h" -#include "xe_hw_engine.h" -#include "xe_hw_fence.h" -#include "xe_lrc.h" -#include "xe_macros.h" -#include "xe_map.h" -#include "xe_mocs.h" -#include "xe_pm.h" -#include "xe_ring_ops_types.h" -#include "xe_sched_job.h" -#include "xe_trace.h" -#include "xe_vm.h" - -static struct xe_guc * -exec_queue_to_guc(struct xe_exec_queue *q) -{ - return &q->gt->uc.guc; -} - -/* - * Helpers for engine state, using an atomic as some of the bits can transition - * as the same time (e.g. a suspend can be happning at the same time as schedule - * engine done being processed). - */ -#define EXEC_QUEUE_STATE_REGISTERED (1 << 0) -#define EXEC_QUEUE_STATE_ENABLED (1 << 1) -#define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) -#define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) -#define EXEC_QUEUE_STATE_DESTROYED (1 << 4) -#define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) -#define EXEC_QUEUE_STATE_RESET (1 << 6) -#define EXEC_QUEUE_STATE_KILLED (1 << 7) -#define EXEC_QUEUE_STATE_WEDGED (1 << 8) -#define EXEC_QUEUE_STATE_BANNED (1 << 9) -#define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10) -#define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11) - -static bool exec_queue_registered(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; -} - -static void set_exec_queue_registered(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); -} - -static void clear_exec_queue_registered(struct xe_exec_queue *q) -{ - atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); -} - -static bool exec_queue_enabled(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; -} - -static void set_exec_queue_enabled(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); -} - -static void clear_exec_queue_enabled(struct xe_exec_queue *q) -{ - atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); -} - -static bool exec_queue_pending_enable(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; -} - -static void set_exec_queue_pending_enable(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); -} - -static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) -{ - atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); -} - -static bool exec_queue_pending_disable(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; -} - -static void set_exec_queue_pending_disable(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); -} - -static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) -{ - atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); -} - -static bool exec_queue_destroyed(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; -} - -static void set_exec_queue_destroyed(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); -} - -static bool exec_queue_banned(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; -} - -static void set_exec_queue_banned(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); -} - -static bool exec_queue_suspended(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; -} - -static void set_exec_queue_suspended(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); -} - -static void clear_exec_queue_suspended(struct xe_exec_queue *q) -{ - atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); -} - -static bool exec_queue_reset(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; -} - -static void set_exec_queue_reset(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); -} - -static bool exec_queue_killed(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; -} - -static void set_exec_queue_killed(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); -} - -static bool exec_queue_wedged(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; -} - -static void set_exec_queue_wedged(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); -} - -static bool exec_queue_check_timeout(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_CHECK_TIMEOUT; -} - -static void set_exec_queue_check_timeout(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); -} - -static void clear_exec_queue_check_timeout(struct xe_exec_queue *q) -{ - atomic_and(~EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); -} - -static bool exec_queue_extra_ref(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_EXTRA_REF; -} - -static void set_exec_queue_extra_ref(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); -} - -static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) -{ - return (atomic_read(&q->guc->state) & - (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | - EXEC_QUEUE_STATE_BANNED)); -} - -#ifdef CONFIG_PROVE_LOCKING -static int alloc_submit_wq(struct xe_guc *guc) -{ - int i; - - for (i = 0; i < NUM_SUBMIT_WQ; ++i) { - guc->submission_state.submit_wq_pool[i] = - alloc_ordered_workqueue("submit_wq", 0); - if (!guc->submission_state.submit_wq_pool[i]) - goto err_free; - } - - return 0; - -err_free: - while (i) - destroy_workqueue(guc->submission_state.submit_wq_pool[--i]); - - return -ENOMEM; -} - -static void free_submit_wq(struct xe_guc *guc) -{ - int i; - - for (i = 0; i < NUM_SUBMIT_WQ; ++i) - destroy_workqueue(guc->submission_state.submit_wq_pool[i]); -} - -static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) -{ - int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ; - - return guc->submission_state.submit_wq_pool[idx]; -} -#else -static int alloc_submit_wq(struct xe_guc *guc) -{ - return 0; -} - -static void free_submit_wq(struct xe_guc *guc) -{ - -} - -static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) -{ - return NULL; -} -#endif - -static void guc_submit_fini(struct drm_device *drm, void *arg) -{ - struct xe_guc *guc = arg; - - xa_destroy(&guc->submission_state.exec_queue_lookup); - free_submit_wq(guc); -} - -static void guc_submit_wedged_fini(void *arg) -{ - struct xe_guc *guc = arg; - struct xe_exec_queue *q; - unsigned long index; - - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - if (exec_queue_wedged(q)) - xe_exec_queue_put(q); -} - -static const struct xe_exec_queue_ops guc_exec_queue_ops; - -static void primelockdep(struct xe_guc *guc) -{ - if (!IS_ENABLED(CONFIG_LOCKDEP)) - return; - - fs_reclaim_acquire(GFP_KERNEL); - - mutex_lock(&guc->submission_state.lock); - mutex_unlock(&guc->submission_state.lock); - - fs_reclaim_release(GFP_KERNEL); -} - -/** - * xe_guc_submit_init() - Initialize GuC submission. - * @guc: the &xe_guc to initialize - * @num_ids: number of GuC context IDs to use - * - * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all - * GuC context IDs supported by the GuC firmware should be used for submission. - * - * Only VF drivers will have to provide explicit number of GuC context IDs - * that they can use for submission. - * - * Return: 0 on success or a negative error code on failure. - */ -int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) -{ - struct xe_device *xe = guc_to_xe(guc); - struct xe_gt *gt = guc_to_gt(guc); - int err; - - err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); - if (err) - return err; - - err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids); - if (err) - return err; - - err = alloc_submit_wq(guc); - if (err) - return err; - - gt->exec_queue_ops = &guc_exec_queue_ops; - - xa_init(&guc->submission_state.exec_queue_lookup); - - primelockdep(guc); - - return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); -} - -static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) -{ - int i; - - lockdep_assert_held(&guc->submission_state.lock); - - for (i = 0; i < xa_count; ++i) - xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); - - xe_guc_id_mgr_release_locked(&guc->submission_state.idm, - q->guc->id, q->width); -} - -static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) -{ - int ret; - void *ptr; - int i; - - /* - * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, - * worse case user gets -ENOMEM on engine create and has to try again. - * - * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent - * failure. - */ - lockdep_assert_held(&guc->submission_state.lock); - - ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, - q->width); - if (ret < 0) - return ret; - - q->guc->id = ret; - - for (i = 0; i < q->width; ++i) { - ptr = xa_store(&guc->submission_state.exec_queue_lookup, - q->guc->id + i, q, GFP_NOWAIT); - if (IS_ERR(ptr)) { - ret = PTR_ERR(ptr); - goto err_release; - } - } - - return 0; - -err_release: - __release_guc_id(guc, q, i); - - return ret; -} - -static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) -{ - mutex_lock(&guc->submission_state.lock); - __release_guc_id(guc, q, q->width); - mutex_unlock(&guc->submission_state.lock); -} - -struct exec_queue_policy { - u32 count; - struct guc_update_exec_queue_policy h2g; -}; - -static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) -{ - size_t bytes = sizeof(policy->h2g.header) + - (sizeof(policy->h2g.klv[0]) * policy->count); - - return bytes / sizeof(u32); -} - -static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, - u16 guc_id) -{ - policy->h2g.header.action = - XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; - policy->h2g.header.guc_id = guc_id; - policy->count = 0; -} - -#define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ -static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ - u32 data) \ -{ \ - XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ -\ - policy->h2g.klv[policy->count].kl = \ - FIELD_PREP(GUC_KLV_0_KEY, \ - GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ - FIELD_PREP(GUC_KLV_0_LEN, 1); \ - policy->h2g.klv[policy->count].value = data; \ - policy->count++; \ -} - -MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) -MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) -MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) -#undef MAKE_EXEC_QUEUE_POLICY_ADD - -static const int xe_exec_queue_prio_to_guc[] = { - [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, - [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, - [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, - [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, -}; - -static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) -{ - struct exec_queue_policy policy; - struct xe_device *xe = guc_to_xe(guc); - enum xe_exec_queue_priority prio = q->sched_props.priority; - u32 timeslice_us = q->sched_props.timeslice_us; - u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; - - xe_assert(xe, exec_queue_registered(q)); - - __guc_exec_queue_policy_start_klv(&policy, q->guc->id); - __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); - __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); - __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); - - xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, - __guc_exec_queue_policy_action_size(&policy), 0, 0); -} - -static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) -{ - struct exec_queue_policy policy; - - __guc_exec_queue_policy_start_klv(&policy, q->guc->id); - __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); - - xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, - __guc_exec_queue_policy_action_size(&policy), 0, 0); -} - -#define parallel_read(xe_, map_, field_) \ - xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ - field_) -#define parallel_write(xe_, map_, field_, val_) \ - xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ - field_, val_) - -static void __register_mlrc_exec_queue(struct xe_guc *guc, - struct xe_exec_queue *q, - struct guc_ctxt_registration_info *info) -{ -#define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) - struct xe_device *xe = guc_to_xe(guc); - u32 action[MAX_MLRC_REG_SIZE]; - int len = 0; - int i; - - xe_assert(xe, xe_exec_queue_is_parallel(q)); - - action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; - action[len++] = info->flags; - action[len++] = info->context_idx; - action[len++] = info->engine_class; - action[len++] = info->engine_submit_mask; - action[len++] = info->wq_desc_lo; - action[len++] = info->wq_desc_hi; - action[len++] = info->wq_base_lo; - action[len++] = info->wq_base_hi; - action[len++] = info->wq_size; - action[len++] = q->width; - action[len++] = info->hwlrca_lo; - action[len++] = info->hwlrca_hi; - - for (i = 1; i < q->width; ++i) { - struct xe_lrc *lrc = q->lrc[i]; - - action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); - action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); - } - - xe_assert(xe, len <= MAX_MLRC_REG_SIZE); -#undef MAX_MLRC_REG_SIZE - - xe_guc_ct_send(&guc->ct, action, len, 0, 0); -} - -static void __register_exec_queue(struct xe_guc *guc, - struct guc_ctxt_registration_info *info) -{ - u32 action[] = { - XE_GUC_ACTION_REGISTER_CONTEXT, - info->flags, - info->context_idx, - info->engine_class, - info->engine_submit_mask, - info->wq_desc_lo, - info->wq_desc_hi, - info->wq_base_lo, - info->wq_base_hi, - info->wq_size, - info->hwlrca_lo, - info->hwlrca_hi, - }; - - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); -} - -static void register_exec_queue(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct xe_lrc *lrc = q->lrc[0]; - struct guc_ctxt_registration_info info; - - xe_assert(xe, !exec_queue_registered(q)); - - memset(&info, 0, sizeof(info)); - info.context_idx = q->guc->id; - info.engine_class = xe_engine_class_to_guc_class(q->class); - info.engine_submit_mask = q->logical_mask; - info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); - info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); - info.flags = CONTEXT_REGISTRATION_FLAG_KMD; - - if (xe_exec_queue_is_parallel(q)) { - u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); - struct iosys_map map = xe_lrc_parallel_map(lrc); - - info.wq_desc_lo = lower_32_bits(ggtt_addr + - offsetof(struct guc_submit_parallel_scratch, wq_desc)); - info.wq_desc_hi = upper_32_bits(ggtt_addr + - offsetof(struct guc_submit_parallel_scratch, wq_desc)); - info.wq_base_lo = lower_32_bits(ggtt_addr + - offsetof(struct guc_submit_parallel_scratch, wq[0])); - info.wq_base_hi = upper_32_bits(ggtt_addr + - offsetof(struct guc_submit_parallel_scratch, wq[0])); - info.wq_size = WQ_SIZE; - - q->guc->wqi_head = 0; - q->guc->wqi_tail = 0; - xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); - parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); - } - - /* - * We must keep a reference for LR engines if engine is registered with - * the GuC as jobs signal immediately and can't destroy an engine if the - * GuC has a reference to it. - */ - if (xe_exec_queue_is_lr(q)) - xe_exec_queue_get(q); - - set_exec_queue_registered(q); - trace_xe_exec_queue_register(q); - if (xe_exec_queue_is_parallel(q)) - __register_mlrc_exec_queue(guc, q, &info); - else - __register_exec_queue(guc, &info); - init_policies(guc, q); -} - -static u32 wq_space_until_wrap(struct xe_exec_queue *q) -{ - return (WQ_SIZE - q->guc->wqi_tail); -} - -static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); - unsigned int sleep_period_ms = 1; - -#define AVAILABLE_SPACE \ - CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) - if (wqi_size > AVAILABLE_SPACE) { -try_again: - q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); - if (wqi_size > AVAILABLE_SPACE) { - if (sleep_period_ms == 1024) { - xe_gt_reset_async(q->gt); - return -ENODEV; - } - - msleep(sleep_period_ms); - sleep_period_ms <<= 1; - goto try_again; - } - } -#undef AVAILABLE_SPACE - - return 0; -} - -static int wq_noop_append(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); - u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; - - if (wq_wait_for_space(q, wq_space_until_wrap(q))) - return -ENODEV; - - xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw)); - - parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], - FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | - FIELD_PREP(WQ_LEN_MASK, len_dw)); - q->guc->wqi_tail = 0; - - return 0; -} - -static void wq_item_append(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); -#define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ - u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; - u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); - u32 len_dw = (wqi_size / sizeof(u32)) - 1; - int i = 0, j; - - if (wqi_size > wq_space_until_wrap(q)) { - if (wq_noop_append(q)) - return; - } - if (wq_wait_for_space(q, wqi_size)) - return; - - wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | - FIELD_PREP(WQ_LEN_MASK, len_dw); - wqi[i++] = xe_lrc_descriptor(q->lrc[0]); - wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | - FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); - wqi[i++] = 0; - for (j = 1; j < q->width; ++j) { - struct xe_lrc *lrc = q->lrc[j]; - - wqi[i++] = lrc->ring.tail / sizeof(u64); - } - - xe_assert(xe, i == wqi_size / sizeof(u32)); - - iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, - wq[q->guc->wqi_tail / sizeof(u32)])); - xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); - q->guc->wqi_tail += wqi_size; - xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE); - - xe_device_wmb(xe); - - map = xe_lrc_parallel_map(q->lrc[0]); - parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); -} - -#define RESUME_PENDING ~0x0ull -static void submit_exec_queue(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct xe_lrc *lrc = q->lrc[0]; - u32 action[3]; - u32 g2h_len = 0; - u32 num_g2h = 0; - int len = 0; - bool extra_submit = false; - - xe_assert(xe, exec_queue_registered(q)); - - if (xe_exec_queue_is_parallel(q)) - wq_item_append(q); - else - xe_lrc_set_ring_tail(lrc, lrc->ring.tail); - - if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) - return; - - if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { - action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; - action[len++] = q->guc->id; - action[len++] = GUC_CONTEXT_ENABLE; - g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; - num_g2h = 1; - if (xe_exec_queue_is_parallel(q)) - extra_submit = true; - - q->guc->resume_time = RESUME_PENDING; - set_exec_queue_pending_enable(q); - set_exec_queue_enabled(q); - trace_xe_exec_queue_scheduling_enable(q); - } else { - action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; - action[len++] = q->guc->id; - trace_xe_exec_queue_submit(q); - } - - xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); - - if (extra_submit) { - len = 0; - action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; - action[len++] = q->guc->id; - trace_xe_exec_queue_submit(q); - - xe_guc_ct_send(&guc->ct, action, len, 0, 0); - } -} - -static struct dma_fence * -guc_exec_queue_run_job(struct drm_sched_job *drm_job) -{ - struct xe_sched_job *job = to_xe_sched_job(drm_job); - struct xe_exec_queue *q = job->q; - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - bool lr = xe_exec_queue_is_lr(q); - - xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || - exec_queue_banned(q) || exec_queue_suspended(q)); - - trace_xe_sched_job_run(job); - - if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) { - if (!exec_queue_registered(q)) - register_exec_queue(q); - if (!lr) /* LR jobs are emitted in the exec IOCTL */ - q->ring_ops->emit_job(job); - submit_exec_queue(q); - } - - if (lr) { - xe_sched_job_set_error(job, -EOPNOTSUPP); - return NULL; - } else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) { - return job->fence; - } else { - return dma_fence_get(job->fence); - } -} - -static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) -{ - struct xe_sched_job *job = to_xe_sched_job(drm_job); - - xe_exec_queue_update_run_ticks(job->q); - - trace_xe_sched_job_free(job); - xe_sched_job_put(job); -} - -static int guc_read_stopped(struct xe_guc *guc) -{ - return atomic_read(&guc->submission_state.stopped); -} - -#define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ - u32 action[] = { \ - XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ - q->guc->id, \ - GUC_CONTEXT_##enable_disable, \ - } - -static void disable_scheduling_deregister(struct xe_guc *guc, - struct xe_exec_queue *q) -{ - MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); - struct xe_device *xe = guc_to_xe(guc); - int ret; - - set_min_preemption_timeout(guc, q); - smp_rmb(); - ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) || - guc_read_stopped(guc), HZ * 5); - if (!ret) { - struct xe_gpu_scheduler *sched = &q->guc->sched; - - drm_warn(&xe->drm, "Pending enable failed to respond"); - xe_sched_submission_start(sched); - xe_gt_reset_async(q->gt); - xe_sched_tdr_queue_imm(sched); - return; - } - - clear_exec_queue_enabled(q); - set_exec_queue_pending_disable(q); - set_exec_queue_destroyed(q); - trace_xe_exec_queue_scheduling_disable(q); - - /* - * Reserve space for both G2H here as the 2nd G2H is sent from a G2H - * handler and we are not allowed to reserved G2H space in handlers. - */ - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + - G2H_LEN_DW_DEREGISTER_CONTEXT, 2); -} - -static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - - /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ - wake_up_all(&xe->ufence_wq); - - if (xe_exec_queue_is_lr(q)) - queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr); - else - xe_sched_tdr_queue_imm(&q->guc->sched); -} - -/** - * xe_guc_submit_wedge() - Wedge GuC submission - * @guc: the GuC object - * - * Save exec queue's registered with GuC state by taking a ref to each queue. - * Register a DRMM handler to drop refs upon driver unload. - */ -void xe_guc_submit_wedge(struct xe_guc *guc) -{ - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - unsigned long index; - int err; - - xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); - - err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, - guc_submit_wedged_fini, guc); - if (err) { - drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); - return; - } - - mutex_lock(&guc->submission_state.lock); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - if (xe_exec_queue_get_unless_zero(q)) - set_exec_queue_wedged(q); - mutex_unlock(&guc->submission_state.lock); -} - -static bool guc_submit_hint_wedged(struct xe_guc *guc) -{ - struct xe_device *xe = guc_to_xe(guc); - - if (xe->wedged.mode != 2) - return false; - - if (xe_device_wedged(xe)) - return true; - - xe_device_declare_wedged(xe); - - return true; -} - -static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) -{ - struct xe_guc_exec_queue *ge = - container_of(w, struct xe_guc_exec_queue, lr_tdr); - struct xe_exec_queue *q = ge->q; - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct xe_gpu_scheduler *sched = &ge->sched; - bool wedged; - - xe_assert(xe, xe_exec_queue_is_lr(q)); - trace_xe_exec_queue_lr_cleanup(q); - - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); - - /* Kill the run_job / process_msg entry points */ - xe_sched_submission_stop(sched); - - /* - * Engine state now mostly stable, disable scheduling / deregister if - * needed. This cleanup routine might be called multiple times, where - * the actual async engine deregister drops the final engine ref. - * Calling disable_scheduling_deregister will mark the engine as - * destroyed and fire off the CT requests to disable scheduling / - * deregister, which we only want to do once. We also don't want to mark - * the engine as pending_disable again as this may race with the - * xe_guc_deregister_done_handler() which treats it as an unexpected - * state. - */ - if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { - struct xe_guc *guc = exec_queue_to_guc(q); - int ret; - - set_exec_queue_banned(q); - disable_scheduling_deregister(guc, q); - - /* - * Must wait for scheduling to be disabled before signalling - * any fences, if GT broken the GT reset code should signal us. - */ - ret = wait_event_timeout(guc->ct.wq, - !exec_queue_pending_disable(q) || - guc_read_stopped(guc), HZ * 5); - if (!ret) { - drm_warn(&xe->drm, "Schedule disable failed to respond"); - xe_sched_submission_start(sched); - xe_gt_reset_async(q->gt); - return; - } - } - - xe_sched_submission_start(sched); -} - -#define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) - -static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) -{ - struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); - u32 ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]); - u32 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); - u32 timeout_ms = q->sched_props.job_timeout_ms; - u32 diff; - u64 running_time_ms; - - /* - * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch - * possible overflows with a high timeout. - */ - xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); - - if (ctx_timestamp < ctx_job_timestamp) - diff = ctx_timestamp + U32_MAX - ctx_job_timestamp; - else - diff = ctx_timestamp - ctx_job_timestamp; - - /* - * Ensure timeout is within 5% to account for an GuC scheduling latency - */ - running_time_ms = - ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); - - xe_gt_dbg(gt, - "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", - xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), - q->guc->id, running_time_ms, timeout_ms, diff); - - return running_time_ms >= timeout_ms; -} - -static void enable_scheduling(struct xe_exec_queue *q) -{ - MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); - struct xe_guc *guc = exec_queue_to_guc(q); - int ret; - - xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); - xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); - - set_exec_queue_pending_enable(q); - set_exec_queue_enabled(q); - trace_xe_exec_queue_scheduling_enable(q); - - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); - - ret = wait_event_timeout(guc->ct.wq, - !exec_queue_pending_enable(q) || - guc_read_stopped(guc), HZ * 5); - if (!ret || guc_read_stopped(guc)) { - xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); - set_exec_queue_banned(q); - xe_gt_reset_async(q->gt); - xe_sched_tdr_queue_imm(&q->guc->sched); - } -} - -static void disable_scheduling(struct xe_exec_queue *q, bool immediate) -{ - MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); - struct xe_guc *guc = exec_queue_to_guc(q); - - xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); - xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); - - if (immediate) - set_min_preemption_timeout(guc, q); - clear_exec_queue_enabled(q); - set_exec_queue_pending_disable(q); - trace_xe_exec_queue_scheduling_disable(q); - - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); -} - -static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) -{ - u32 action[] = { - XE_GUC_ACTION_DEREGISTER_CONTEXT, - q->guc->id, - }; - - xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); - xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); - - set_exec_queue_destroyed(q); - trace_xe_exec_queue_deregister(q); - - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_DEREGISTER_CONTEXT, 1); -} - -static enum drm_gpu_sched_stat -guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) -{ - struct xe_sched_job *job = to_xe_sched_job(drm_job); - struct xe_sched_job *tmp_job; - struct xe_exec_queue *q = job->q; - struct xe_gpu_scheduler *sched = &q->guc->sched; - struct xe_guc *guc = exec_queue_to_guc(q); -<<<<<<< -======= - const char *process_name = "no process"; ->>>>>>> - int err = -ETIME; - int i = 0; - bool wedged, skip_timeout_check; - - /* - * TDR has fired before free job worker. Common if exec queue - * immediately closed after last fence signaled. - */ - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - guc_exec_queue_free_job(drm_job); - - return DRM_GPU_SCHED_STAT_NOMINAL; - } - - /* Kill the run_job entry point */ - xe_sched_submission_stop(sched); - - /* Must check all state after stopping scheduler */ - skip_timeout_check = exec_queue_reset(q) || - exec_queue_killed_or_banned_or_wedged(q) || - exec_queue_destroyed(q); - - /* Job hasn't started, can't be timed out */ - if (!skip_timeout_check && !xe_sched_job_started(job)) - goto rearm; - - /* - * XXX: Sampling timeout doesn't work in wedged mode as we have to - * modify scheduling state to read timestamp. We could read the - * timestamp from a register to accumulate current running time but this - * doesn't work for SRIOV. For now assuming timeouts in wedged mode are - * genuine timeouts. - */ - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); - - /* Engine state now stable, disable scheduling to check timestamp */ - if (!wedged && exec_queue_registered(q)) { - int ret; - - if (exec_queue_reset(q)) - err = -EIO; - - if (!exec_queue_destroyed(q)) { - /* - * Wait for any pending G2H to flush out before - * modifying state - */ - ret = wait_event_timeout(guc->ct.wq, - !exec_queue_pending_enable(q) || - guc_read_stopped(guc), HZ * 5); - if (!ret || guc_read_stopped(guc)) - goto trigger_reset; - - /* - * Flag communicates to G2H handler that schedule - * disable originated from a timeout check. The G2H then - * avoid triggering cleanup or deregistering the exec - * queue. - */ - set_exec_queue_check_timeout(q); - disable_scheduling(q, skip_timeout_check); - } - - /* - * Must wait for scheduling to be disabled before signalling - * any fences, if GT broken the GT reset code should signal us. - * - * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault - * error) messages which can cause the schedule disable to get - * lost. If this occurs, trigger a GT reset to recover. - */ - smp_rmb(); - ret = wait_event_timeout(guc->ct.wq, - !exec_queue_pending_disable(q) || - guc_read_stopped(guc), HZ * 5); - if (!ret || guc_read_stopped(guc)) { -trigger_reset: - if (!ret) - xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond"); - set_exec_queue_extra_ref(q); - xe_exec_queue_get(q); /* GT reset owns this */ - set_exec_queue_banned(q); - xe_gt_reset_async(q->gt); - xe_sched_tdr_queue_imm(sched); - goto rearm; - } - } - - /* - * Check if job is actually timed out, if so restart job execution and TDR - */ - if (!wedged && !skip_timeout_check && !check_timeout(q, job) && - !exec_queue_reset(q) && exec_queue_registered(q)) { - clear_exec_queue_check_timeout(q); - goto sched_enable; - } - -<<<<<<< - if (q->vm && q->vm->xef) { - process_name = q->vm->xef->process_name; - pid = q->vm->xef->pid; - } - xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", - xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), - q->guc->id, q->flags, process_name, pid); - -======= - xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", - xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), - q->guc->id, q->flags); ->>>>>>> - trace_xe_sched_job_timedout(job); - - if (!exec_queue_killed(q)) - xe_devcoredump(job); - - /* - * Kernel jobs should never fail, nor should VM jobs if they do - * somethings has gone wrong and the GT needs a reset - */ - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, - "Kernel-submitted job timed out\n"); - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), - "VM job timed out on non-killed execqueue\n"); - if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || - (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { - if (!xe_sched_invalidate_job(job, 2)) { - clear_exec_queue_check_timeout(q); - xe_gt_reset_async(q->gt); - goto rearm; - } - } - - /* Finish cleaning up exec queue via deregister */ - set_exec_queue_banned(q); - if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { - set_exec_queue_extra_ref(q); - xe_exec_queue_get(q); - __deregister_exec_queue(guc, q); - } - - /* Stop fence signaling */ - xe_hw_fence_irq_stop(q->fence_irq); - - /* - * Fence state now stable, stop / start scheduler which cleans up any - * fences that are complete - */ - xe_sched_add_pending_job(sched, job); - xe_sched_submission_start(sched); - - xe_guc_exec_queue_trigger_cleanup(q); - - /* Mark all outstanding jobs as bad, thus completing them */ - spin_lock(&sched->base.job_list_lock); - list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) - xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); - spin_unlock(&sched->base.job_list_lock); - - /* Start fence signaling */ - xe_hw_fence_irq_start(q->fence_irq); - - return DRM_GPU_SCHED_STAT_NOMINAL; - -sched_enable: - enable_scheduling(q); -rearm: - /* - * XXX: Ideally want to adjust timeout based on current exection time - * but there is not currently an easy way to do in DRM scheduler. With - * some thought, do this in a follow up. - */ - xe_sched_add_pending_job(sched, job); - xe_sched_submission_start(sched); - - return DRM_GPU_SCHED_STAT_NOMINAL; -} - -static void __guc_exec_queue_fini_async(struct work_struct *w) -{ - struct xe_guc_exec_queue *ge = - container_of(w, struct xe_guc_exec_queue, fini_async); - struct xe_exec_queue *q = ge->q; - struct xe_guc *guc = exec_queue_to_guc(q); - - xe_pm_runtime_get(guc_to_xe(guc)); - trace_xe_exec_queue_destroy(q); - - if (xe_exec_queue_is_lr(q)) - cancel_work_sync(&ge->lr_tdr); - release_guc_id(guc, q); - xe_sched_entity_fini(&ge->entity); - xe_sched_fini(&ge->sched); - - kfree(ge); - xe_exec_queue_fini(q); - xe_pm_runtime_put(guc_to_xe(guc)); -} - -static void guc_exec_queue_fini_async(struct xe_exec_queue *q) -{ - INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); - - /* We must block on kernel engines so slabs are empty on driver unload */ - if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) - __guc_exec_queue_fini_async(&q->guc->fini_async); - else - queue_work(system_wq, &q->guc->fini_async); -} - -static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) -{ - /* - * Might be done from within the GPU scheduler, need to do async as we - * fini the scheduler when the engine is fini'd, the scheduler can't - * complete fini within itself (circular dependency). Async resolves - * this we and don't really care when everything is fini'd, just that it - * is. - */ - guc_exec_queue_fini_async(q); -} - -static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) -{ - struct xe_exec_queue *q = msg->private_data; - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - - xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); - trace_xe_exec_queue_cleanup_entity(q); - - if (exec_queue_registered(q)) - disable_scheduling_deregister(guc, q); - else - __guc_exec_queue_fini(guc, q); -} - -static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) -{ - return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); -} - -static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) -{ - struct xe_exec_queue *q = msg->private_data; - struct xe_guc *guc = exec_queue_to_guc(q); - - if (guc_exec_queue_allowed_to_change_state(q)) - init_policies(guc, q); - kfree(msg); -} - -static void suspend_fence_signal(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - - xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) || - guc_read_stopped(guc)); - xe_assert(xe, q->guc->suspend_pending); - - q->guc->suspend_pending = false; - smp_wmb(); - wake_up(&q->guc->suspend_wait); -} - -static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) -{ - struct xe_exec_queue *q = msg->private_data; - struct xe_guc *guc = exec_queue_to_guc(q); - - if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && - exec_queue_enabled(q)) { - wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING || - guc_read_stopped(guc)); - - if (!guc_read_stopped(guc)) { - s64 since_resume_ms = - ktime_ms_delta(ktime_get(), - q->guc->resume_time); - s64 wait_ms = q->vm->preempt.min_run_period_ms - - since_resume_ms; - - if (wait_ms > 0 && q->guc->resume_time) - msleep(wait_ms); - - set_exec_queue_suspended(q); - disable_scheduling(q, false); - } - } else if (q->guc->suspend_pending) { - set_exec_queue_suspended(q); - suspend_fence_signal(q); - } -} - -static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) -{ - struct xe_exec_queue *q = msg->private_data; - - if (guc_exec_queue_allowed_to_change_state(q)) { - q->guc->resume_time = RESUME_PENDING; - clear_exec_queue_suspended(q); - enable_scheduling(q); - } else { - clear_exec_queue_suspended(q); - } -} - -#define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ -#define SET_SCHED_PROPS 2 -#define SUSPEND 3 -#define RESUME 4 - -static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) -{ - trace_xe_sched_msg_recv(msg); - - switch (msg->opcode) { - case CLEANUP: - __guc_exec_queue_process_msg_cleanup(msg); - break; - case SET_SCHED_PROPS: - __guc_exec_queue_process_msg_set_sched_props(msg); - break; - case SUSPEND: - __guc_exec_queue_process_msg_suspend(msg); - break; - case RESUME: - __guc_exec_queue_process_msg_resume(msg); - break; - default: - XE_WARN_ON("Unknown message type"); - } - - xe_pm_runtime_put(guc_to_xe(exec_queue_to_guc(msg->private_data))); -} - -static const struct drm_sched_backend_ops drm_sched_ops = { - .run_job = guc_exec_queue_run_job, - .free_job = guc_exec_queue_free_job, - .timedout_job = guc_exec_queue_timedout_job, -}; - -static const struct xe_sched_backend_ops xe_sched_ops = { - .process_msg = guc_exec_queue_process_msg, -}; - -static int guc_exec_queue_init(struct xe_exec_queue *q) -{ - struct xe_gpu_scheduler *sched; - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct xe_guc_exec_queue *ge; - long timeout; - int err; - - xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); - - ge = kzalloc(sizeof(*ge), GFP_KERNEL); - if (!ge) - return -ENOMEM; - - q->guc = ge; - ge->q = q; - init_waitqueue_head(&ge->suspend_wait); - - timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : - msecs_to_jiffies(q->sched_props.job_timeout_ms); - err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, - get_submit_wq(guc), - q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64, - timeout, guc_to_gt(guc)->ordered_wq, NULL, - q->name, gt_to_xe(q->gt)->drm.dev); - if (err) - goto err_free; - - sched = &ge->sched; - err = xe_sched_entity_init(&ge->entity, sched); - if (err) - goto err_sched; - - if (xe_exec_queue_is_lr(q)) - INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup); - - mutex_lock(&guc->submission_state.lock); - - err = alloc_guc_id(guc, q); - if (err) - goto err_entity; - - q->entity = &ge->entity; - - if (guc_read_stopped(guc)) - xe_sched_stop(sched); - - mutex_unlock(&guc->submission_state.lock); - - xe_exec_queue_assign_name(q, q->guc->id); - - trace_xe_exec_queue_create(q); - - return 0; - -err_entity: - mutex_unlock(&guc->submission_state.lock); - xe_sched_entity_fini(&ge->entity); -err_sched: - xe_sched_fini(&ge->sched); -err_free: - kfree(ge); - - return err; -} - -static void guc_exec_queue_kill(struct xe_exec_queue *q) -{ - trace_xe_exec_queue_kill(q); - set_exec_queue_killed(q); - xe_guc_exec_queue_trigger_cleanup(q); -} - -static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, - u32 opcode) -{ - xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); - - INIT_LIST_HEAD(&msg->link); - msg->opcode = opcode; - msg->private_data = q; - - trace_xe_sched_msg_add(msg); - xe_sched_add_msg(&q->guc->sched, msg); -} - -#define STATIC_MSG_CLEANUP 0 -#define STATIC_MSG_SUSPEND 1 -#define STATIC_MSG_RESUME 2 -static void guc_exec_queue_fini(struct xe_exec_queue *q) -{ - struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; - - if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) - guc_exec_queue_add_msg(q, msg, CLEANUP); - else - __guc_exec_queue_fini(exec_queue_to_guc(q), q); -} - -static int guc_exec_queue_set_priority(struct xe_exec_queue *q, - enum xe_exec_queue_priority priority) -{ - struct xe_sched_msg *msg; - - if (q->sched_props.priority == priority || - exec_queue_killed_or_banned_or_wedged(q)) - return 0; - - msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (!msg) - return -ENOMEM; - - q->sched_props.priority = priority; - guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); - - return 0; -} - -static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) -{ - struct xe_sched_msg *msg; - - if (q->sched_props.timeslice_us == timeslice_us || - exec_queue_killed_or_banned_or_wedged(q)) - return 0; - - msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (!msg) - return -ENOMEM; - - q->sched_props.timeslice_us = timeslice_us; - guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); - - return 0; -} - -static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, - u32 preempt_timeout_us) -{ - struct xe_sched_msg *msg; - - if (q->sched_props.preempt_timeout_us == preempt_timeout_us || - exec_queue_killed_or_banned_or_wedged(q)) - return 0; - - msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (!msg) - return -ENOMEM; - - q->sched_props.preempt_timeout_us = preempt_timeout_us; - guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); - - return 0; -} - -static int guc_exec_queue_suspend(struct xe_exec_queue *q) -{ - struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; - - if (exec_queue_killed_or_banned_or_wedged(q) || q->guc->suspend_pending) - return -EINVAL; - - q->guc->suspend_pending = true; - guc_exec_queue_add_msg(q, msg, SUSPEND); - - return 0; -} - -static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - - wait_event(q->guc->suspend_wait, !q->guc->suspend_pending || - guc_read_stopped(guc)); -} - -static void guc_exec_queue_resume(struct xe_exec_queue *q) -{ - struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - - xe_assert(xe, !q->guc->suspend_pending); - - guc_exec_queue_add_msg(q, msg, RESUME); -} - -static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) -{ - return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); -} - -/* - * All of these functions are an abstraction layer which other parts of XE can - * use to trap into the GuC backend. All of these functions, aside from init, - * really shouldn't do much other than trap into the DRM scheduler which - * synchronizes these operations. - */ -static const struct xe_exec_queue_ops guc_exec_queue_ops = { - .init = guc_exec_queue_init, - .kill = guc_exec_queue_kill, - .fini = guc_exec_queue_fini, - .set_priority = guc_exec_queue_set_priority, - .set_timeslice = guc_exec_queue_set_timeslice, - .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, - .suspend = guc_exec_queue_suspend, - .suspend_wait = guc_exec_queue_suspend_wait, - .resume = guc_exec_queue_resume, - .reset_status = guc_exec_queue_reset_status, -}; - -static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) -{ - struct xe_gpu_scheduler *sched = &q->guc->sched; - - /* Stop scheduling + flush any DRM scheduler operations */ - xe_sched_submission_stop(sched); - - /* Clean up lost G2H + reset engine state */ - if (exec_queue_registered(q)) { - if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) - xe_exec_queue_put(q); - else if (exec_queue_destroyed(q)) - __guc_exec_queue_fini(guc, q); - } - if (q->guc->suspend_pending) { - set_exec_queue_suspended(q); - suspend_fence_signal(q); - } - atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | - EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | - EXEC_QUEUE_STATE_SUSPENDED, - &q->guc->state); - q->guc->resume_time = 0; - trace_xe_exec_queue_stop(q); - - /* - * Ban any engine (aside from kernel and engines used for VM ops) with a - * started but not complete job or if a job has gone through a GT reset - * more than twice. - */ - if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { - struct xe_sched_job *job = xe_sched_first_pending_job(sched); - bool ban = false; - - if (job) { - if ((xe_sched_job_started(job) && - !xe_sched_job_completed(job)) || - xe_sched_invalidate_job(job, 2)) { - trace_xe_sched_job_ban(job); - ban = true; - } - } else if (xe_exec_queue_is_lr(q) && - (xe_lrc_ring_head(q->lrc[0]) != xe_lrc_ring_tail(q->lrc[0]))) { - ban = true; - } - - if (ban) { - set_exec_queue_banned(q); - xe_guc_exec_queue_trigger_cleanup(q); - } - } -} - -int xe_guc_submit_reset_prepare(struct xe_guc *guc) -{ - int ret; - - /* - * Using an atomic here rather than submission_state.lock as this - * function can be called while holding the CT lock (engine reset - * failure). submission_state.lock needs the CT lock to resubmit jobs. - * Atomic is not ideal, but it works to prevent against concurrent reset - * and releasing any TDRs waiting on guc->submission_state.stopped. - */ - ret = atomic_fetch_or(1, &guc->submission_state.stopped); - smp_wmb(); - wake_up_all(&guc->ct.wq); - - return ret; -} - -void xe_guc_submit_reset_wait(struct xe_guc *guc) -{ - wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || - !guc_read_stopped(guc)); -} - -void xe_guc_submit_stop(struct xe_guc *guc) -{ - struct xe_exec_queue *q; - unsigned long index; - struct xe_device *xe = guc_to_xe(guc); - - xe_assert(xe, guc_read_stopped(guc) == 1); - - mutex_lock(&guc->submission_state.lock); - - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - guc_exec_queue_stop(guc, q); - - mutex_unlock(&guc->submission_state.lock); - - /* - * No one can enter the backend at this point, aside from new engine - * creation which is protected by guc->submission_state.lock. - */ - -} - -static void guc_exec_queue_start(struct xe_exec_queue *q) -{ - struct xe_gpu_scheduler *sched = &q->guc->sched; - - if (!exec_queue_killed_or_banned_or_wedged(q)) { - int i; - - trace_xe_exec_queue_resubmit(q); - for (i = 0; i < q->width; ++i) - xe_lrc_set_ring_head(q->lrc[i], q->lrc[i]->ring.tail); - xe_sched_resubmit_jobs(sched); - } - - xe_sched_submission_start(sched); -} - -int xe_guc_submit_start(struct xe_guc *guc) -{ - struct xe_exec_queue *q; - unsigned long index; - struct xe_device *xe = guc_to_xe(guc); - - xe_assert(xe, guc_read_stopped(guc) == 1); - - mutex_lock(&guc->submission_state.lock); - atomic_dec(&guc->submission_state.stopped); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - guc_exec_queue_start(q); - mutex_unlock(&guc->submission_state.lock); - - wake_up_all(&guc->ct.wq); - - return 0; -} - -static struct xe_exec_queue * -g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) -{ - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - - if (unlikely(guc_id >= GUC_ID_MAX)) { - drm_err(&xe->drm, "Invalid guc_id %u", guc_id); - return NULL; - } - - q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); - if (unlikely(!q)) { - drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); - return NULL; - } - - xe_assert(xe, guc_id >= q->guc->id); - xe_assert(xe, guc_id < (q->guc->id + q->width)); - - return q; -} - -static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) -{ - u32 action[] = { - XE_GUC_ACTION_DEREGISTER_CONTEXT, - q->guc->id, - }; - - xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); - xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); - - trace_xe_exec_queue_deregister(q); - - xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); -} - -static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, - u32 runnable_state) -{ - trace_xe_exec_queue_scheduling_done(q); - - if (runnable_state == 1) { - xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); - - q->guc->resume_time = ktime_get(); - clear_exec_queue_pending_enable(q); - smp_wmb(); - wake_up_all(&guc->ct.wq); - } else { - bool check_timeout = exec_queue_check_timeout(q); - - xe_gt_assert(guc_to_gt(guc), runnable_state == 0); - xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); - - clear_exec_queue_pending_disable(q); - if (q->guc->suspend_pending) { - suspend_fence_signal(q); - } else { - if (exec_queue_banned(q) || check_timeout) { - smp_wmb(); - wake_up_all(&guc->ct.wq); - } - if (!check_timeout) - deregister_exec_queue(guc, q); - } - } -} - -int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - u32 guc_id = msg[0]; - u32 runnable_state = msg[1]; - - if (unlikely(len < 2)) { - drm_err(&xe->drm, "Invalid length %u", len); - return -EPROTO; - } - - q = g2h_exec_queue_lookup(guc, guc_id); - if (unlikely(!q)) - return -EPROTO; - - if (unlikely(!exec_queue_pending_enable(q) && - !exec_queue_pending_disable(q))) { - xe_gt_err(guc_to_gt(guc), - "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", - atomic_read(&q->guc->state), q->guc->id, - runnable_state); - return -EPROTO; - } - - handle_sched_done(guc, q, runnable_state); - - return 0; -} - -static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) -{ - trace_xe_exec_queue_deregister_done(q); - - clear_exec_queue_registered(q); - - if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) - xe_exec_queue_put(q); - else - __guc_exec_queue_fini(guc, q); -} - -int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - u32 guc_id = msg[0]; - - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); - return -EPROTO; - } - - q = g2h_exec_queue_lookup(guc, guc_id); - if (unlikely(!q)) - return -EPROTO; - - if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || - exec_queue_pending_enable(q) || exec_queue_enabled(q)) { - xe_gt_err(guc_to_gt(guc), - "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", - atomic_read(&q->guc->state), q->guc->id); - return -EPROTO; - } - - handle_deregister_done(guc, q); - - return 0; -} - -int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - u32 guc_id = msg[0]; - - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); - return -EPROTO; - } - - q = g2h_exec_queue_lookup(guc, guc_id); - if (unlikely(!q)) - return -EPROTO; - - xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d", - xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); - - /* FIXME: Do error capture, most likely async */ - - trace_xe_exec_queue_reset(q); - - /* - * A banned engine is a NOP at this point (came from - * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel - * jobs by setting timeout of the job to the minimum value kicking - * guc_exec_queue_timedout_job. - */ - set_exec_queue_reset(q); - if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) - xe_guc_exec_queue_trigger_cleanup(q); - - return 0; -} - -int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, - u32 len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - u32 guc_id = msg[0]; - - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); - return -EPROTO; - } - - q = g2h_exec_queue_lookup(guc, guc_id); - if (unlikely(!q)) - return -EPROTO; - - xe_gt_dbg(gt, "Engine memory cat error: engine_class=%s, logical_mask: 0x%x, guc_id=%d", - xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); - - trace_xe_exec_queue_memory_cat_error(q); - - /* Treat the same as engine reset */ - set_exec_queue_reset(q); - if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) - xe_guc_exec_queue_trigger_cleanup(q); - - return 0; -} - -int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_device *xe = guc_to_xe(guc); - u8 guc_class, instance; - u32 reason; - - if (unlikely(len != 3)) { - drm_err(&xe->drm, "Invalid length %u", len); - return -EPROTO; - } - - guc_class = msg[0]; - instance = msg[1]; - reason = msg[2]; - - /* Unexpected failure of a hardware feature, log an actual error */ - drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", - guc_class, instance, reason); - - xe_gt_reset_async(guc_to_gt(guc)); - - return 0; -} - -static void -guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, - struct xe_guc_submit_exec_queue_snapshot *snapshot) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); - int i; - - snapshot->guc.wqi_head = q->guc->wqi_head; - snapshot->guc.wqi_tail = q->guc->wqi_tail; - snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); - snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); - snapshot->parallel.wq_desc.status = parallel_read(xe, map, - wq_desc.wq_status); - - if (snapshot->parallel.wq_desc.head != - snapshot->parallel.wq_desc.tail) { - for (i = snapshot->parallel.wq_desc.head; - i != snapshot->parallel.wq_desc.tail; - i = (i + sizeof(u32)) % WQ_SIZE) - snapshot->parallel.wq[i / sizeof(u32)] = - parallel_read(xe, map, wq[i / sizeof(u32)]); - } -} - -static void -guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, - struct drm_printer *p) -{ - int i; - - drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", - snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); - drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", - snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); - drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); - - if (snapshot->parallel.wq_desc.head != - snapshot->parallel.wq_desc.tail) { - for (i = snapshot->parallel.wq_desc.head; - i != snapshot->parallel.wq_desc.tail; - i = (i + sizeof(u32)) % WQ_SIZE) - drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), - snapshot->parallel.wq[i / sizeof(u32)]); - } -} - -/** - * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. - * @q: faulty exec queue - * - * This can be printed out in a later stage like during dev_coredump - * analysis. - * - * Returns: a GuC Submit Engine snapshot object that must be freed by the - * caller, using `xe_guc_exec_queue_snapshot_free`. - */ -struct xe_guc_submit_exec_queue_snapshot * -xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) -{ - struct xe_gpu_scheduler *sched = &q->guc->sched; - struct xe_guc_submit_exec_queue_snapshot *snapshot; - int i; - - snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); - - if (!snapshot) - return NULL; - - snapshot->guc.id = q->guc->id; - memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); - snapshot->class = q->class; - snapshot->logical_mask = q->logical_mask; - snapshot->width = q->width; - snapshot->refcount = kref_read(&q->refcount); - snapshot->sched_timeout = sched->base.timeout; - snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; - snapshot->sched_props.preempt_timeout_us = - q->sched_props.preempt_timeout_us; - - snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *), - GFP_ATOMIC); - - if (snapshot->lrc) { - for (i = 0; i < q->width; ++i) { - struct xe_lrc *lrc = q->lrc[i]; - - snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); - } - } - - snapshot->schedule_state = atomic_read(&q->guc->state); - snapshot->exec_queue_flags = q->flags; - - snapshot->parallel_execution = xe_exec_queue_is_parallel(q); - if (snapshot->parallel_execution) - guc_exec_queue_wq_snapshot_capture(q, snapshot); - - spin_lock(&sched->base.job_list_lock); - snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list); - snapshot->pending_list = kmalloc_array(snapshot->pending_list_size, - sizeof(struct pending_list_snapshot), - GFP_ATOMIC); - - if (snapshot->pending_list) { - struct xe_sched_job *job_iter; - - i = 0; - list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) { - snapshot->pending_list[i].seqno = - xe_sched_job_seqno(job_iter); - snapshot->pending_list[i].fence = - dma_fence_is_signaled(job_iter->fence) ? 1 : 0; - snapshot->pending_list[i].finished = - dma_fence_is_signaled(&job_iter->drm.s_fence->finished) - ? 1 : 0; - i++; - } - } - - spin_unlock(&sched->base.job_list_lock); - - return snapshot; -} - -/** - * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. - * @snapshot: Previously captured snapshot of job. - * - * This captures some data that requires taking some locks, so it cannot be done in signaling path. - */ -void -xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) -{ - int i; - - if (!snapshot || !snapshot->lrc) - return; - - for (i = 0; i < snapshot->width; ++i) - xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); -} - -/** - * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. - * @snapshot: GuC Submit Engine snapshot object. - * @p: drm_printer where it will be printed out. - * - * This function prints out a given GuC Submit Engine snapshot object. - */ -void -xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, - struct drm_printer *p) -{ - int i; - - if (!snapshot) - return; - - drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id); - drm_printf(p, "\tName: %s\n", snapshot->name); - drm_printf(p, "\tClass: %d\n", snapshot->class); - drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); - drm_printf(p, "\tWidth: %d\n", snapshot->width); - drm_printf(p, "\tRef: %d\n", snapshot->refcount); - drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); - drm_printf(p, "\tTimeslice: %u (us)\n", - snapshot->sched_props.timeslice_us); - drm_printf(p, "\tPreempt timeout: %u (us)\n", - snapshot->sched_props.preempt_timeout_us); - - for (i = 0; snapshot->lrc && i < snapshot->width; ++i) - xe_lrc_snapshot_print(snapshot->lrc[i], p); - - drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); - drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); - - if (snapshot->parallel_execution) - guc_exec_queue_wq_snapshot_print(snapshot, p); - - for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size; - i++) - drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", - snapshot->pending_list[i].seqno, - snapshot->pending_list[i].fence, - snapshot->pending_list[i].finished); -} - -/** - * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given - * snapshot. - * @snapshot: GuC Submit Engine snapshot object. - * - * This function free all the memory that needed to be allocated at capture - * time. - */ -void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) -{ - int i; - - if (!snapshot) - return; - - if (snapshot->lrc) { - for (i = 0; i < snapshot->width; i++) - xe_lrc_snapshot_free(snapshot->lrc[i]); - kfree(snapshot->lrc); - } - kfree(snapshot->pending_list); - kfree(snapshot); -} - -static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) -{ - struct xe_guc_submit_exec_queue_snapshot *snapshot; - - snapshot = xe_guc_exec_queue_snapshot_capture(q); - xe_guc_exec_queue_snapshot_print(snapshot, p); - xe_guc_exec_queue_snapshot_free(snapshot); -} - -/** - * xe_guc_submit_print - GuC Submit Print. - * @guc: GuC. - * @p: drm_printer where it will be printed out. - * - * This function capture and prints snapshots of **all** GuC Engines. - */ -void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) -{ - struct xe_exec_queue *q; - unsigned long index; - - if (!xe_device_uc_enabled(guc_to_xe(guc))) - return; - - mutex_lock(&guc->submission_state.lock); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - guc_exec_queue_print(q, p); - mutex_unlock(&guc->submission_state.lock); -} |