diff options
author | Robert Foss <rfoss@kernel.org> | 2024-11-11 17:34:12 +0100 |
---|---|---|
committer | Robert Foss <rfoss@kernel.org> | 2024-11-11 17:34:12 +0100 |
commit | 3a9af52564cdba37502ced6fa00ad36487fb4687 (patch) | |
tree | f8e96b7e0bed94906fb41f6a2652d9dda864f5d0 | |
parent | 4baecf35656c913155fa00788015f3565760ee1a (diff) |
2024y-11m-11d-16h-32m-16s UTC: drm-tip rerere cache update
git version 2.47.0
13 files changed, 0 insertions, 16279 deletions
diff --git a/rr-cache/11feaea49c34208543617598344df3d89c9b8a09/postimage b/rr-cache/11feaea49c34208543617598344df3d89c9b8a09/postimage deleted file mode 100644 index c4add8b38bbd..000000000000 --- a/rr-cache/11feaea49c34208543617598344df3d89c9b8a09/postimage +++ /dev/null @@ -1,354 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2023 Intel Corporation - */ -#include "xe_drm_client.h" - -#include <drm/drm_print.h> -#include <uapi/drm/xe_drm.h> -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/types.h> - -#include "xe_assert.h" -#include "xe_bo.h" -#include "xe_bo_types.h" -#include "xe_device_types.h" -#include "xe_exec_queue.h" -#include "xe_force_wake.h" -#include "xe_gt.h" -#include "xe_hw_engine.h" -#include "xe_pm.h" -#include "xe_trace.h" - -/** - * DOC: DRM Client usage stats - * - * The drm/xe driver implements the DRM client usage stats specification as - * documented in :ref:`drm-client-usage-stats`. - * - * Example of the output showing the implemented key value pairs and entirety of - * the currently possible format options: - * - * :: - * - * pos: 0 - * flags: 0100002 - * mnt_id: 26 - * ino: 685 - * drm-driver: xe - * drm-client-id: 3 - * drm-pdev: 0000:03:00.0 - * drm-total-system: 0 - * drm-shared-system: 0 - * drm-active-system: 0 - * drm-resident-system: 0 - * drm-purgeable-system: 0 - * drm-total-gtt: 192 KiB - * drm-shared-gtt: 0 - * drm-active-gtt: 0 - * drm-resident-gtt: 192 KiB - * drm-total-vram0: 23992 KiB - * drm-shared-vram0: 16 MiB - * drm-active-vram0: 0 - * drm-resident-vram0: 23992 KiB - * drm-total-stolen: 0 - * drm-shared-stolen: 0 - * drm-active-stolen: 0 - * drm-resident-stolen: 0 - * drm-cycles-rcs: 28257900 - * drm-total-cycles-rcs: 7655183225 - * drm-cycles-bcs: 0 - * drm-total-cycles-bcs: 7655183225 - * drm-cycles-vcs: 0 - * drm-total-cycles-vcs: 7655183225 - * drm-engine-capacity-vcs: 2 - * drm-cycles-vecs: 0 - * drm-total-cycles-vecs: 7655183225 - * drm-engine-capacity-vecs: 2 - * drm-cycles-ccs: 0 - * drm-total-cycles-ccs: 7655183225 - * drm-engine-capacity-ccs: 4 - * - * Possible `drm-cycles-` key names are: `rcs`, `ccs`, `bcs`, `vcs`, `vecs` and - * "other". - */ - -/** - * xe_drm_client_alloc() - Allocate drm client - * @void: No arg - * - * Allocate drm client struct to track client memory against - * same till client life. Call this API whenever new client - * has opened xe device. - * - * Return: pointer to client struct or NULL if can't allocate - */ -struct xe_drm_client *xe_drm_client_alloc(void) -{ - struct xe_drm_client *client; - - client = kzalloc(sizeof(*client), GFP_KERNEL); - if (!client) - return NULL; - - kref_init(&client->kref); - -#ifdef CONFIG_PROC_FS - spin_lock_init(&client->bos_lock); - INIT_LIST_HEAD(&client->bos_list); -#endif - return client; -} - -/** - * __xe_drm_client_free() - Free client struct - * @kref: The reference - * - * This frees client struct. Call this API when xe device is closed - * by drm client. - * - * Return: void - */ -void __xe_drm_client_free(struct kref *kref) -{ - struct xe_drm_client *client = - container_of(kref, typeof(*client), kref); - - kfree(client); -} - -#ifdef CONFIG_PROC_FS -/** - * xe_drm_client_add_bo() - Add BO for tracking client mem usage - * @client: The drm client ptr - * @bo: The xe BO ptr - * - * Add all BO created by individual drm client by calling this function. - * This helps in tracking client memory usage. - * - * Return: void - */ -void xe_drm_client_add_bo(struct xe_drm_client *client, - struct xe_bo *bo) -{ - XE_WARN_ON(bo->client); - XE_WARN_ON(!list_empty(&bo->client_link)); - - spin_lock(&client->bos_lock); - bo->client = xe_drm_client_get(client); - list_add_tail(&bo->client_link, &client->bos_list); - spin_unlock(&client->bos_lock); -} - -/** - * xe_drm_client_remove_bo() - Remove BO for tracking client mem usage - * @bo: The xe BO ptr - * - * Remove all BO removed by individual drm client by calling this function. - * This helps in tracking client memory usage. - * - * Return: void - */ -void xe_drm_client_remove_bo(struct xe_bo *bo) -{ - struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); - struct xe_drm_client *client = bo->client; - - xe_assert(xe, !kref_read(&bo->ttm.base.refcount)); - - spin_lock(&client->bos_lock); - list_del_init(&bo->client_link); - spin_unlock(&client->bos_lock); - - xe_drm_client_put(client); -} - -static void bo_meminfo(struct xe_bo *bo, - struct drm_memory_stats stats[TTM_NUM_MEM_TYPES]) -{ - u64 sz = bo->size; - u32 mem_type = bo->ttm.resource->mem_type; - - xe_bo_assert_held(bo); - - if (drm_gem_object_is_shared_for_memory_stats(&bo->ttm.base)) - stats[mem_type].shared += sz; - else - stats[mem_type].private += sz; - - if (xe_bo_has_pages(bo)) { - stats[mem_type].resident += sz; - - if (!dma_resv_test_signaled(bo->ttm.base.resv, - DMA_RESV_USAGE_BOOKKEEP)) - stats[mem_type].active += sz; - else if (mem_type == XE_PL_SYSTEM) - stats[mem_type].purgeable += sz; - } -} - -static void show_meminfo(struct drm_printer *p, struct drm_file *file) -{ - struct drm_memory_stats stats[TTM_NUM_MEM_TYPES] = {}; - struct xe_file *xef = file->driver_priv; - struct ttm_device *bdev = &xef->xe->ttm; - struct ttm_resource_manager *man; - struct xe_drm_client *client; - struct drm_gem_object *obj; - struct xe_bo *bo; - LLIST_HEAD(deferred); - unsigned int id; - u32 mem_type; - - client = xef->client; - - /* Public objects. */ - spin_lock(&file->table_lock); - idr_for_each_entry(&file->object_idr, obj, id) { - struct xe_bo *bo = gem_to_xe_bo(obj); - - if (dma_resv_trylock(bo->ttm.base.resv)) { - bo_meminfo(bo, stats); - xe_bo_unlock(bo); - } else { - xe_bo_get(bo); - spin_unlock(&file->table_lock); - - xe_bo_lock(bo, false); - bo_meminfo(bo, stats); - xe_bo_unlock(bo); - - xe_bo_put(bo); - spin_lock(&file->table_lock); - } - } - spin_unlock(&file->table_lock); - - /* Internal objects. */ - spin_lock(&client->bos_lock); - list_for_each_entry(bo, &client->bos_list, client_link) { - if (!kref_get_unless_zero(&bo->ttm.base.refcount)) - continue; - - if (dma_resv_trylock(bo->ttm.base.resv)) { - bo_meminfo(bo, stats); - xe_bo_unlock(bo); - } else { - spin_unlock(&client->bos_lock); - - xe_bo_lock(bo, false); - bo_meminfo(bo, stats); - xe_bo_unlock(bo); - - spin_lock(&client->bos_lock); - /* The bo ref will prevent this bo from being removed from the list */ - xe_assert(xef->xe, !list_empty(&bo->client_link)); - } - - xe_bo_put_deferred(bo, &deferred); - } - spin_unlock(&client->bos_lock); - - xe_bo_put_commit(&deferred); - - for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) { - if (!xe_mem_type_to_name[mem_type]) - continue; - - man = ttm_manager_type(bdev, mem_type); - - if (man) { - drm_print_memory_stats(p, - &stats[mem_type], - DRM_GEM_OBJECT_RESIDENT | - (mem_type != XE_PL_SYSTEM ? 0 : - DRM_GEM_OBJECT_PURGEABLE), - xe_mem_type_to_name[mem_type]); - } - } -} - -static void show_run_ticks(struct drm_printer *p, struct drm_file *file) -{ - unsigned long class, i, gt_id, capacity[XE_ENGINE_CLASS_MAX] = { }; - struct xe_file *xef = file->driver_priv; - struct xe_device *xe = xef->xe; - struct xe_gt *gt; - struct xe_hw_engine *hwe; - struct xe_exec_queue *q; - u64 gpu_timestamp; - - xe_pm_runtime_get(xe); - - /* Accumulate all the exec queues from this client */ - mutex_lock(&xef->exec_queue.lock); - xa_for_each(&xef->exec_queue.xa, i, q) - xe_exec_queue_update_run_ticks(q); - mutex_unlock(&xef->exec_queue.lock); - - /* Get the total GPU cycles */ - for_each_gt(gt, xe, gt_id) { - enum xe_force_wake_domains fw; - - hwe = xe_gt_any_hw_engine(gt); - if (!hwe) - continue; - - fw = xe_hw_engine_to_fw_domain(hwe); - if (xe_force_wake_get(gt_to_fw(gt), fw)) { - hwe = NULL; - break; - } - - gpu_timestamp = xe_hw_engine_read_timestamp(hwe); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), fw)); - break; - } - - xe_pm_runtime_put(xe); - - if (unlikely(!hwe)) - return; - - for (class = 0; class < XE_ENGINE_CLASS_MAX; class++) { - const char *class_name; - - for_each_gt(gt, xe, gt_id) - capacity[class] += gt->user_engines.instances_per_class[class]; - - /* - * Engines may be fused off or not exposed to userspace. Don't - * return anything if this entire class is not available - */ - if (!capacity[class]) - continue; - - class_name = xe_hw_engine_class_to_str(class); - drm_printf(p, "drm-cycles-%s:\t%llu\n", - class_name, xef->run_ticks[class]); - drm_printf(p, "drm-total-cycles-%s:\t%llu\n", - class_name, gpu_timestamp); - - if (capacity[class] > 1) - drm_printf(p, "drm-engine-capacity-%s:\t%lu\n", - class_name, capacity[class]); - } -} - -/** - * xe_drm_client_fdinfo() - Callback for fdinfo interface - * @p: The drm_printer ptr - * @file: The drm_file ptr - * - * This is callabck for drm fdinfo interface. Register this callback - * in drm driver ops for show_fdinfo. - * - * Return: void - */ -void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file) -{ - show_meminfo(p, file); - show_run_ticks(p, file); -} -#endif diff --git a/rr-cache/11feaea49c34208543617598344df3d89c9b8a09/preimage b/rr-cache/11feaea49c34208543617598344df3d89c9b8a09/preimage deleted file mode 100644 index 15bf27074856..000000000000 --- a/rr-cache/11feaea49c34208543617598344df3d89c9b8a09/preimage +++ /dev/null @@ -1,362 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2023 Intel Corporation - */ -#include "xe_drm_client.h" - -#include <drm/drm_print.h> -#include <uapi/drm/xe_drm.h> -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/types.h> - -#include "xe_assert.h" -#include "xe_bo.h" -#include "xe_bo_types.h" -#include "xe_device_types.h" -#include "xe_exec_queue.h" -#include "xe_force_wake.h" -#include "xe_gt.h" -#include "xe_hw_engine.h" -#include "xe_pm.h" -#include "xe_trace.h" - -/** - * DOC: DRM Client usage stats - * - * The drm/xe driver implements the DRM client usage stats specification as - * documented in :ref:`drm-client-usage-stats`. - * - * Example of the output showing the implemented key value pairs and entirety of - * the currently possible format options: - * - * :: - * - * pos: 0 - * flags: 0100002 - * mnt_id: 26 - * ino: 685 - * drm-driver: xe - * drm-client-id: 3 - * drm-pdev: 0000:03:00.0 - * drm-total-system: 0 - * drm-shared-system: 0 - * drm-active-system: 0 - * drm-resident-system: 0 - * drm-purgeable-system: 0 - * drm-total-gtt: 192 KiB - * drm-shared-gtt: 0 - * drm-active-gtt: 0 - * drm-resident-gtt: 192 KiB - * drm-total-vram0: 23992 KiB - * drm-shared-vram0: 16 MiB - * drm-active-vram0: 0 - * drm-resident-vram0: 23992 KiB - * drm-total-stolen: 0 - * drm-shared-stolen: 0 - * drm-active-stolen: 0 - * drm-resident-stolen: 0 - * drm-cycles-rcs: 28257900 - * drm-total-cycles-rcs: 7655183225 - * drm-cycles-bcs: 0 - * drm-total-cycles-bcs: 7655183225 - * drm-cycles-vcs: 0 - * drm-total-cycles-vcs: 7655183225 - * drm-engine-capacity-vcs: 2 - * drm-cycles-vecs: 0 - * drm-total-cycles-vecs: 7655183225 - * drm-engine-capacity-vecs: 2 - * drm-cycles-ccs: 0 - * drm-total-cycles-ccs: 7655183225 - * drm-engine-capacity-ccs: 4 - * - * Possible `drm-cycles-` key names are: `rcs`, `ccs`, `bcs`, `vcs`, `vecs` and - * "other". - */ - -/** - * xe_drm_client_alloc() - Allocate drm client - * @void: No arg - * - * Allocate drm client struct to track client memory against - * same till client life. Call this API whenever new client - * has opened xe device. - * - * Return: pointer to client struct or NULL if can't allocate - */ -struct xe_drm_client *xe_drm_client_alloc(void) -{ - struct xe_drm_client *client; - - client = kzalloc(sizeof(*client), GFP_KERNEL); - if (!client) - return NULL; - - kref_init(&client->kref); - -#ifdef CONFIG_PROC_FS - spin_lock_init(&client->bos_lock); - INIT_LIST_HEAD(&client->bos_list); -#endif - return client; -} - -/** - * __xe_drm_client_free() - Free client struct - * @kref: The reference - * - * This frees client struct. Call this API when xe device is closed - * by drm client. - * - * Return: void - */ -void __xe_drm_client_free(struct kref *kref) -{ - struct xe_drm_client *client = - container_of(kref, typeof(*client), kref); - - kfree(client); -} - -#ifdef CONFIG_PROC_FS -/** - * xe_drm_client_add_bo() - Add BO for tracking client mem usage - * @client: The drm client ptr - * @bo: The xe BO ptr - * - * Add all BO created by individual drm client by calling this function. - * This helps in tracking client memory usage. - * - * Return: void - */ -void xe_drm_client_add_bo(struct xe_drm_client *client, - struct xe_bo *bo) -{ - XE_WARN_ON(bo->client); - XE_WARN_ON(!list_empty(&bo->client_link)); - - spin_lock(&client->bos_lock); - bo->client = xe_drm_client_get(client); - list_add_tail(&bo->client_link, &client->bos_list); - spin_unlock(&client->bos_lock); -} - -/** - * xe_drm_client_remove_bo() - Remove BO for tracking client mem usage - * @bo: The xe BO ptr - * - * Remove all BO removed by individual drm client by calling this function. - * This helps in tracking client memory usage. - * - * Return: void - */ -void xe_drm_client_remove_bo(struct xe_bo *bo) -{ - struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); - struct xe_drm_client *client = bo->client; - - xe_assert(xe, !kref_read(&bo->ttm.base.refcount)); - - spin_lock(&client->bos_lock); - list_del_init(&bo->client_link); - spin_unlock(&client->bos_lock); - - xe_drm_client_put(client); -} - -static void bo_meminfo(struct xe_bo *bo, - struct drm_memory_stats stats[TTM_NUM_MEM_TYPES]) -{ - u64 sz = bo->size; - u32 mem_type = bo->ttm.resource->mem_type; - - xe_bo_assert_held(bo); -<<<<<<< -======= - - if (bo->placement.placement) - mem_type = bo->placement.placement->mem_type; - else - mem_type = XE_PL_TT; ->>>>>>> - - if (drm_gem_object_is_shared_for_memory_stats(&bo->ttm.base)) - stats[mem_type].shared += sz; - else - stats[mem_type].private += sz; - - if (xe_bo_has_pages(bo)) { - stats[mem_type].resident += sz; - - if (!dma_resv_test_signaled(bo->ttm.base.resv, - DMA_RESV_USAGE_BOOKKEEP)) - stats[mem_type].active += sz; - else if (mem_type == XE_PL_SYSTEM) - stats[mem_type].purgeable += sz; - } -} - -static void show_meminfo(struct drm_printer *p, struct drm_file *file) -{ - struct drm_memory_stats stats[TTM_NUM_MEM_TYPES] = {}; - struct xe_file *xef = file->driver_priv; - struct ttm_device *bdev = &xef->xe->ttm; - struct ttm_resource_manager *man; - struct xe_drm_client *client; - struct drm_gem_object *obj; - struct xe_bo *bo; - LLIST_HEAD(deferred); - unsigned int id; - u32 mem_type; - - client = xef->client; - - /* Public objects. */ - spin_lock(&file->table_lock); - idr_for_each_entry(&file->object_idr, obj, id) { - struct xe_bo *bo = gem_to_xe_bo(obj); - - if (dma_resv_trylock(bo->ttm.base.resv)) { - bo_meminfo(bo, stats); - xe_bo_unlock(bo); - } else { - xe_bo_get(bo); - spin_unlock(&file->table_lock); - - xe_bo_lock(bo, false); - bo_meminfo(bo, stats); - xe_bo_unlock(bo); - - xe_bo_put(bo); - spin_lock(&file->table_lock); - } - } - spin_unlock(&file->table_lock); - - /* Internal objects. */ - spin_lock(&client->bos_lock); - list_for_each_entry(bo, &client->bos_list, client_link) { - if (!kref_get_unless_zero(&bo->ttm.base.refcount)) - continue; - - if (dma_resv_trylock(bo->ttm.base.resv)) { - bo_meminfo(bo, stats); - xe_bo_unlock(bo); - } else { - spin_unlock(&client->bos_lock); - - xe_bo_lock(bo, false); - bo_meminfo(bo, stats); - xe_bo_unlock(bo); - - spin_lock(&client->bos_lock); - /* The bo ref will prevent this bo from being removed from the list */ - xe_assert(xef->xe, !list_empty(&bo->client_link)); - } - - xe_bo_put_deferred(bo, &deferred); - } - spin_unlock(&client->bos_lock); - - xe_bo_put_commit(&deferred); - - for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) { - if (!xe_mem_type_to_name[mem_type]) - continue; - - man = ttm_manager_type(bdev, mem_type); - - if (man) { - drm_print_memory_stats(p, - &stats[mem_type], - DRM_GEM_OBJECT_RESIDENT | - (mem_type != XE_PL_SYSTEM ? 0 : - DRM_GEM_OBJECT_PURGEABLE), - xe_mem_type_to_name[mem_type]); - } - } -} - -static void show_run_ticks(struct drm_printer *p, struct drm_file *file) -{ - unsigned long class, i, gt_id, capacity[XE_ENGINE_CLASS_MAX] = { }; - struct xe_file *xef = file->driver_priv; - struct xe_device *xe = xef->xe; - struct xe_gt *gt; - struct xe_hw_engine *hwe; - struct xe_exec_queue *q; - u64 gpu_timestamp; - - xe_pm_runtime_get(xe); - - /* Accumulate all the exec queues from this client */ - mutex_lock(&xef->exec_queue.lock); - xa_for_each(&xef->exec_queue.xa, i, q) - xe_exec_queue_update_run_ticks(q); - mutex_unlock(&xef->exec_queue.lock); - - /* Get the total GPU cycles */ - for_each_gt(gt, xe, gt_id) { - enum xe_force_wake_domains fw; - - hwe = xe_gt_any_hw_engine(gt); - if (!hwe) - continue; - - fw = xe_hw_engine_to_fw_domain(hwe); - if (xe_force_wake_get(gt_to_fw(gt), fw)) { - hwe = NULL; - break; - } - - gpu_timestamp = xe_hw_engine_read_timestamp(hwe); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), fw)); - break; - } - - xe_pm_runtime_put(xe); - - if (unlikely(!hwe)) - return; - - for (class = 0; class < XE_ENGINE_CLASS_MAX; class++) { - const char *class_name; - - for_each_gt(gt, xe, gt_id) - capacity[class] += gt->user_engines.instances_per_class[class]; - - /* - * Engines may be fused off or not exposed to userspace. Don't - * return anything if this entire class is not available - */ - if (!capacity[class]) - continue; - - class_name = xe_hw_engine_class_to_str(class); - drm_printf(p, "drm-cycles-%s:\t%llu\n", - class_name, xef->run_ticks[class]); - drm_printf(p, "drm-total-cycles-%s:\t%llu\n", - class_name, gpu_timestamp); - - if (capacity[class] > 1) - drm_printf(p, "drm-engine-capacity-%s:\t%lu\n", - class_name, capacity[class]); - } -} - -/** - * xe_drm_client_fdinfo() - Callback for fdinfo interface - * @p: The drm_printer ptr - * @file: The drm_file ptr - * - * This is callabck for drm fdinfo interface. Register this callback - * in drm driver ops for show_fdinfo. - * - * Return: void - */ -void xe_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file) -{ - show_meminfo(p, file); - show_run_ticks(p, file); -} -#endif diff --git a/rr-cache/1ab34439cb1e3b81c7513b62c0d161208b67130e/preimage b/rr-cache/1ab34439cb1e3b81c7513b62c0d161208b67130e/preimage deleted file mode 100644 index a822b974678a..000000000000 --- a/rr-cache/1ab34439cb1e3b81c7513b62c0d161208b67130e/preimage +++ /dev/null @@ -1,601 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _XE_GT_REGS_H_ -#define _XE_GT_REGS_H_ - -#include "regs/xe_reg_defs.h" - -/* - * The GSI register range [0x0 - 0x40000) is replicated at a higher offset - * for the media GT. xe_mmio and xe_gt_mcr functions will automatically - * translate offsets by MEDIA_GT_GSI_OFFSET when operating on the media GT. - */ -#define MEDIA_GT_GSI_OFFSET 0x380000 -#define MEDIA_GT_GSI_LENGTH 0x40000 - -/* MTL workpoint reg to get core C state and actual freq of 3D, SAMedia */ -#define MTL_MIRROR_TARGET_WP1 XE_REG(0xc60) -#define MTL_CAGF_MASK REG_GENMASK(8, 0) -#define MTL_CC_MASK REG_GENMASK(12, 9) - -/* RPM unit config (Gen8+) */ -#define RPM_CONFIG0 XE_REG(0xd00) -#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK REG_GENMASK(5, 3) -#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 0 -#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 1 -#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ 2 -#define RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ 3 -#define RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1) - -#define FORCEWAKE_ACK_MEDIA_VDBOX(n) XE_REG(0xd50 + (n) * 4) -#define FORCEWAKE_ACK_MEDIA_VEBOX(n) XE_REG(0xd70 + (n) * 4) -#define FORCEWAKE_ACK_RENDER XE_REG(0xd84) - -#define GMD_ID XE_REG(0xd8c) -#define GMD_ID_ARCH_MASK REG_GENMASK(31, 22) -#define GMD_ID_RELEASE_MASK REG_GENMASK(21, 14) -#define GMD_ID_REVID REG_GENMASK(5, 0) - -#define FORCEWAKE_ACK_GSC XE_REG(0xdf8) -#define FORCEWAKE_ACK_GT_MTL XE_REG(0xdfc) - -#define MCFG_MCR_SELECTOR XE_REG(0xfd0) -#define MTL_MCR_SELECTOR XE_REG(0xfd4) -#define SF_MCR_SELECTOR XE_REG(0xfd8) -#define MCR_SELECTOR XE_REG(0xfdc) -#define GAM_MCR_SELECTOR XE_REG(0xfe0) -#define MCR_MULTICAST REG_BIT(31) -#define MCR_SLICE_MASK REG_GENMASK(30, 27) -#define MCR_SLICE(slice) REG_FIELD_PREP(MCR_SLICE_MASK, slice) -#define MCR_SUBSLICE_MASK REG_GENMASK(26, 24) -#define MCR_SUBSLICE(subslice) REG_FIELD_PREP(MCR_SUBSLICE_MASK, subslice) -#define MTL_MCR_GROUPID REG_GENMASK(11, 8) -#define MTL_MCR_INSTANCEID REG_GENMASK(3, 0) - -#define PS_INVOCATION_COUNT XE_REG(0x2348) - -#define XELP_GLOBAL_MOCS(i) XE_REG(0x4000 + (i) * 4) -#define XEHP_GLOBAL_MOCS(i) XE_REG_MCR(0x4000 + (i) * 4) -#define LE_SSE_MASK REG_GENMASK(18, 17) -#define LE_SSE(value) REG_FIELD_PREP(LE_SSE_MASK, value) -#define LE_COS_MASK REG_GENMASK(16, 15) -#define LE_COS(value) REG_FIELD_PREP(LE_COS_MASK) -#define LE_SCF_MASK REG_BIT(14) -#define LE_SCF(value) REG_FIELD_PREP(LE_SCF_MASK, value) -#define LE_PFM_MASK REG_GENMASK(13, 11) -#define LE_PFM(value) REG_FIELD_PREP(LE_PFM_MASK, value) -#define LE_SCC_MASK REG_GENMASK(10, 8) -#define LE_SCC(value) REG_FIELD_PREP(LE_SCC_MASK, value) -#define LE_RSC_MASK REG_BIT(7) -#define LE_RSC(value) REG_FIELD_PREP(LE_RSC_MASK, value) -#define LE_AOM_MASK REG_BIT(6) -#define LE_AOM(value) REG_FIELD_PREP(LE_AOM_MASK, value) -#define LE_LRUM_MASK REG_GENMASK(5, 4) -#define LE_LRUM(value) REG_FIELD_PREP(LE_LRUM_MASK, value) -#define LE_TGT_CACHE_MASK REG_GENMASK(3, 2) -#define LE_TGT_CACHE(value) REG_FIELD_PREP(LE_TGT_CACHE_MASK, value) -#define LE_CACHEABILITY_MASK REG_GENMASK(1, 0) -#define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value) - -<<<<<<< -#define STATELESS_COMPRESSION_CTRL XE_REG(0x4148) -#define UNIFIED_COMPRESSION_FORMAT REG_GENMASK(3, 0) - -#define XE2_GAMREQSTRM_CTRL XE_REG_MCR(0x4194) -======= -#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194) ->>>>>>> -#define CG_DIS_CNTLBUS REG_BIT(6) - -#define CCS_AUX_INV XE_REG(0x4208) - -#define VD0_AUX_INV XE_REG(0x4218) -#define VE0_AUX_INV XE_REG(0x4238) - -#define VE1_AUX_INV XE_REG(0x42b8) -#define AUX_INV REG_BIT(0) - -#define XEHP_TILE_ADDR_RANGE(_idx) XE_REG_MCR(0x4900 + (_idx) * 4) -#define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910) -#define XEHP_FLAT_CCS_PTR REG_GENMASK(31, 8) - -#define WM_CHICKEN3 XE_REG_MCR(0x5588, XE_REG_OPTION_MASKED) -#define HIZ_PLANE_COMPRESSION_DIS REG_BIT(10) - -#define CHICKEN_RASTER_1 XE_REG_MCR(0x6204, XE_REG_OPTION_MASKED) -#define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) - -#define CHICKEN_RASTER_2 XE_REG_MCR(0x6208, XE_REG_OPTION_MASKED) -#define TBIMR_FAST_CLIP REG_BIT(5) - -#define FF_MODE XE_REG_MCR(0x6210) -#define DIS_TE_AUTOSTRIP REG_BIT(31) -#define DIS_MESH_PARTIAL_AUTOSTRIP REG_BIT(16) -#define DIS_MESH_AUTOSTRIP REG_BIT(15) - -#define VFLSKPD XE_REG_MCR(0x62a8, XE_REG_OPTION_MASKED) -#define DIS_PARTIAL_AUTOSTRIP REG_BIT(9) -#define DIS_AUTOSTRIP REG_BIT(6) -#define DIS_OVER_FETCH_CACHE REG_BIT(1) -#define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) - -#define FF_MODE2 XE_REG(0x6604) -#define XEHP_FF_MODE2 XE_REG_MCR(0x6604) -#define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24) -#define FF_MODE2_GS_TIMER_224 REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224) -#define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16) -#define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4) - -#define XEHPG_INSTDONE_GEOM_SVGUNIT XE_REG_MCR(0x666c) - -#define CACHE_MODE_1 XE_REG(0x7004, XE_REG_OPTION_MASKED) -#define MSAA_OPTIMIZATION_REDUC_DISABLE REG_BIT(11) - -#define COMMON_SLICE_CHICKEN1 XE_REG(0x7010, XE_REG_OPTION_MASKED) -#define DISABLE_BOTTOM_CLIP_RECTANGLE_TEST REG_BIT(14) - -#define HIZ_CHICKEN XE_REG(0x7018, XE_REG_OPTION_MASKED) -#define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14) -#define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13) - -#define XEHP_PSS_MODE2 XE_REG_MCR(0x703c, XE_REG_OPTION_MASKED) -#define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5) - -#define XEHP_PSS_CHICKEN XE_REG_MCR(0x7044, XE_REG_OPTION_MASKED) -#define FLSH_IGNORES_PSD REG_BIT(10) -#define FD_END_COLLECT REG_BIT(5) - -#define SC_INSTDONE XE_REG(0x7100) -#define SC_INSTDONE_EXTRA XE_REG(0x7104) -#define SC_INSTDONE_EXTRA2 XE_REG(0x7108) - -#define XEHPG_SC_INSTDONE XE_REG_MCR(0x7100) -#define XEHPG_SC_INSTDONE_EXTRA XE_REG_MCR(0x7104) -#define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108) - -#define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED) -#define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6) - -#define COMMON_SLICE_CHICKEN3 XE_REG(0x7304, XE_REG_OPTION_MASKED) -#define XEHP_COMMON_SLICE_CHICKEN3 XE_REG_MCR(0x7304, XE_REG_OPTION_MASKED) -#define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12) -#define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE REG_BIT(12) -#define BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11) -#define DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9) - -#define XEHP_SLICE_COMMON_ECO_CHICKEN1 XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED) -#define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) - -#define VF_PREEMPTION XE_REG(0x83a4, XE_REG_OPTION_MASKED) -#define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0) - -#define VF_SCRATCHPAD XE_REG(0x83a8, XE_REG_OPTION_MASKED) -#define XE2_VFG_TED_CREDIT_INTERFACE_DISABLE REG_BIT(13) - -#define VFG_PREEMPTION_CHICKEN XE_REG(0x83b4, XE_REG_OPTION_MASKED) -#define POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4) - -#define SQCNT1 XE_REG_MCR(0x8718) -#define XELPMP_SQCNT1 XE_REG(0x8718) -#define SQCNT1_PMON_ENABLE REG_BIT(30) -#define SQCNT1_OABPC REG_BIT(29) -#define ENFORCE_RAR REG_BIT(23) - -#define XEHP_SQCM XE_REG_MCR(0x8724) -#define EN_32B_ACCESS REG_BIT(30) - -#define XE2_FLAT_CCS_BASE_RANGE_LOWER XE_REG_MCR(0x8800) -#define XE2_FLAT_CCS_ENABLE REG_BIT(0) -#define XE2_FLAT_CCS_BASE_LOWER_ADDR_MASK REG_GENMASK(31, 6) - -#define XE2_FLAT_CCS_BASE_RANGE_UPPER XE_REG_MCR(0x8804) -#define XE2_FLAT_CCS_BASE_UPPER_ADDR_MASK REG_GENMASK(7, 0) - -#define GSCPSMI_BASE XE_REG(0x880c) - -#define CCCHKNREG1 XE_REG_MCR(0x8828) -#define ENCOMPPERFFIX REG_BIT(18) - -/* Fuse readout registers for GT */ -#define XEHP_FUSE4 XE_REG(0x9114) -#define CFEG_WMTP_DISABLE REG_BIT(20) -#define CCS_EN_MASK REG_GENMASK(19, 16) -#define GT_L3_EXC_MASK REG_GENMASK(6, 4) - -#define MIRROR_FUSE3 XE_REG(0x9118) -#define XE2_NODE_ENABLE_MASK REG_GENMASK(31, 16) -#define L3BANK_PAIR_COUNT 4 -#define XEHPC_GT_L3_MODE_MASK REG_GENMASK(7, 4) -#define XE2_GT_L3_MODE_MASK REG_GENMASK(7, 4) -#define L3BANK_MASK REG_GENMASK(3, 0) -#define XELP_GT_L3_MODE_MASK REG_GENMASK(7, 0) -/* on Xe_HP the same fuses indicates mslices instead of L3 banks */ -#define MAX_MSLICES 4 -#define MEML3_EN_MASK REG_GENMASK(3, 0) - -#define MIRROR_FUSE1 XE_REG(0x911c) - -#define XELP_EU_ENABLE XE_REG(0x9134) /* "_DISABLE" on Xe_LP */ -#define XELP_EU_MASK REG_GENMASK(7, 0) -#define XELP_GT_SLICE_ENABLE XE_REG(0x9138) -#define XELP_GT_GEOMETRY_DSS_ENABLE XE_REG(0x913c) - -#define GT_VEBOX_VDBOX_DISABLE XE_REG(0x9140) -#define GT_VEBOX_DISABLE_MASK REG_GENMASK(19, 16) -#define GT_VDBOX_DISABLE_MASK REG_GENMASK(7, 0) - -#define XEHP_GT_COMPUTE_DSS_ENABLE XE_REG(0x9144) -#define XEHPC_GT_COMPUTE_DSS_ENABLE_EXT XE_REG(0x9148) -#define XE2_GT_COMPUTE_DSS_2 XE_REG(0x914c) -#define XE2_GT_GEOMETRY_DSS_1 XE_REG(0x9150) -#define XE2_GT_GEOMETRY_DSS_2 XE_REG(0x9154) - -#define GDRST XE_REG(0x941c) -#define GRDOM_GUC REG_BIT(3) -#define GRDOM_FULL REG_BIT(0) - -#define MISCCPCTL XE_REG(0x9424) -#define DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1) - -#define UNSLCGCTL9430 XE_REG(0x9430) -#define MSQDUNIT_CLKGATE_DIS REG_BIT(3) - -#define UNSLICE_UNIT_LEVEL_CLKGATE XE_REG(0x9434) -#define VFUNIT_CLKGATE_DIS REG_BIT(20) -#define TSGUNIT_CLKGATE_DIS REG_BIT(17) /* XEHPSDV */ -#define CG3DDISCFEG_CLKGATE_DIS REG_BIT(17) /* DG2 */ -#define GAMEDIA_CLKGATE_DIS REG_BIT(11) -#define HSUNIT_CLKGATE_DIS REG_BIT(8) -#define VSUNIT_CLKGATE_DIS REG_BIT(3) - -#define UNSLCGCTL9440 XE_REG(0x9440) -#define GAMTLBOACS_CLKGATE_DIS REG_BIT(28) -#define GAMTLBVDBOX5_CLKGATE_DIS REG_BIT(27) -#define GAMTLBVDBOX6_CLKGATE_DIS REG_BIT(26) -#define GAMTLBVDBOX3_CLKGATE_DIS REG_BIT(24) -#define GAMTLBVDBOX4_CLKGATE_DIS REG_BIT(23) -#define GAMTLBVDBOX7_CLKGATE_DIS REG_BIT(22) -#define GAMTLBVDBOX2_CLKGATE_DIS REG_BIT(21) -#define GAMTLBVDBOX0_CLKGATE_DIS REG_BIT(17) -#define GAMTLBKCR_CLKGATE_DIS REG_BIT(16) -#define GAMTLBGUC_CLKGATE_DIS REG_BIT(15) -#define GAMTLBBLT_CLKGATE_DIS REG_BIT(14) -#define GAMTLBVDBOX1_CLKGATE_DIS REG_BIT(6) - -#define UNSLCGCTL9444 XE_REG(0x9444) -#define GAMTLBGFXA0_CLKGATE_DIS REG_BIT(30) -#define GAMTLBGFXA1_CLKGATE_DIS REG_BIT(29) -#define GAMTLBCOMPA0_CLKGATE_DIS REG_BIT(28) -#define GAMTLBCOMPA1_CLKGATE_DIS REG_BIT(27) -#define GAMTLBCOMPB0_CLKGATE_DIS REG_BIT(26) -#define GAMTLBCOMPB1_CLKGATE_DIS REG_BIT(25) -#define GAMTLBCOMPC0_CLKGATE_DIS REG_BIT(24) -#define GAMTLBCOMPC1_CLKGATE_DIS REG_BIT(23) -#define GAMTLBCOMPD0_CLKGATE_DIS REG_BIT(22) -#define GAMTLBCOMPD1_CLKGATE_DIS REG_BIT(21) -#define GAMTLBMERT_CLKGATE_DIS REG_BIT(20) -#define GAMTLBVEBOX3_CLKGATE_DIS REG_BIT(19) -#define GAMTLBVEBOX2_CLKGATE_DIS REG_BIT(18) -#define GAMTLBVEBOX1_CLKGATE_DIS REG_BIT(17) -#define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16) -#define LTCDD_CLKGATE_DIS REG_BIT(10) - -#define XEHP_SLICE_UNIT_LEVEL_CLKGATE XE_REG_MCR(0x94d4) -#define L3_CR2X_CLKGATE_DIS REG_BIT(17) -#define L3_CLKGATE_DIS REG_BIT(16) -#define NODEDSS_CLKGATE_DIS REG_BIT(12) -#define MSCUNIT_CLKGATE_DIS REG_BIT(10) -#define RCCUNIT_CLKGATE_DIS REG_BIT(7) -#define SARBUNIT_CLKGATE_DIS REG_BIT(5) -#define SBEUNIT_CLKGATE_DIS REG_BIT(4) - -#define UNSLICE_UNIT_LEVEL_CLKGATE2 XE_REG(0x94e4) -#define VSUNIT_CLKGATE2_DIS REG_BIT(19) - -#define SUBSLICE_UNIT_LEVEL_CLKGATE XE_REG_MCR(0x9524) -#define DSS_ROUTER_CLKGATE_DIS REG_BIT(28) -#define GWUNIT_CLKGATE_DIS REG_BIT(16) - -#define SUBSLICE_UNIT_LEVEL_CLKGATE2 XE_REG_MCR(0x9528) -#define CPSSUNIT_CLKGATE_DIS REG_BIT(9) - -#define SSMCGCTL9530 XE_REG_MCR(0x9530) -#define RTFUNIT_CLKGATE_DIS REG_BIT(18) - -#define DFR_RATIO_EN_AND_CHICKEN XE_REG_MCR(0x9550) -#define DFR_DISABLE REG_BIT(9) - -#define RPNSWREQ XE_REG(0xa008) -#define REQ_RATIO_MASK REG_GENMASK(31, 23) - -#define RP_CONTROL XE_REG(0xa024) -#define RPSWCTL_MASK REG_GENMASK(10, 9) -#define RPSWCTL_ENABLE REG_FIELD_PREP(RPSWCTL_MASK, 2) -#define RPSWCTL_DISABLE REG_FIELD_PREP(RPSWCTL_MASK, 0) -#define RC_CONTROL XE_REG(0xa090) -#define RC_CTL_HW_ENABLE REG_BIT(31) -#define RC_CTL_TO_MODE REG_BIT(28) -#define RC_CTL_RC6_ENABLE REG_BIT(18) -#define RC_STATE XE_REG(0xa094) -#define RC_IDLE_HYSTERSIS XE_REG(0xa0ac) -#define MEDIA_POWERGATE_IDLE_HYSTERESIS XE_REG(0xa0c4) -#define RENDER_POWERGATE_IDLE_HYSTERESIS XE_REG(0xa0c8) - -#define PMINTRMSK XE_REG(0xa168) -#define PMINTR_DISABLE_REDIRECT_TO_GUC REG_BIT(31) -#define ARAT_EXPIRED_INTRMSK REG_BIT(9) - -#define FORCEWAKE_GT XE_REG(0xa188) - -#define POWERGATE_ENABLE XE_REG(0xa210) -#define RENDER_POWERGATE_ENABLE REG_BIT(0) -#define MEDIA_POWERGATE_ENABLE REG_BIT(1) -#define VDN_HCP_POWERGATE_ENABLE(n) REG_BIT(3 + 2 * (n)) -#define VDN_MFXVDENC_POWERGATE_ENABLE(n) REG_BIT(4 + 2 * (n)) - -#define CTC_MODE XE_REG(0xa26c) -#define CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1) -#define CTC_SOURCE_DIVIDE_LOGIC REG_BIT(0) - -#define FORCEWAKE_RENDER XE_REG(0xa278) -#define FORCEWAKE_MEDIA_VDBOX(n) XE_REG(0xa540 + (n) * 4) -#define FORCEWAKE_MEDIA_VEBOX(n) XE_REG(0xa560 + (n) * 4) -#define FORCEWAKE_GSC XE_REG(0xa618) - -#define XEHPC_LNCFMISCCFGREG0 XE_REG_MCR(0xb01c, XE_REG_OPTION_MASKED) -#define XEHPC_OVRLSCCC REG_BIT(0) - -/* L3 Cache Control */ -#define LNCFCMOCS_REG_COUNT 32 -#define XELP_LNCFCMOCS(i) XE_REG(0xb020 + (i) * 4) -#define XEHP_LNCFCMOCS(i) XE_REG_MCR(0xb020 + (i) * 4) -#define L3_UPPER_LKUP_MASK REG_BIT(23) -#define L3_UPPER_GLBGO_MASK REG_BIT(22) -#define L3_UPPER_IDX_CACHEABILITY_MASK REG_GENMASK(21, 20) -#define L3_UPPER_IDX_SCC_MASK REG_GENMASK(19, 17) -#define L3_UPPER_IDX_ESC_MASK REG_BIT(16) -#define L3_LKUP_MASK REG_BIT(7) -#define L3_LKUP(value) REG_FIELD_PREP(L3_LKUP_MASK, value) -#define L3_GLBGO_MASK REG_BIT(6) -#define L3_GLBGO(value) REG_FIELD_PREP(L3_GLBGO_MASK, value) -#define L3_CACHEABILITY_MASK REG_GENMASK(5, 4) -#define L3_CACHEABILITY(value) REG_FIELD_PREP(L3_CACHEABILITY_MASK, value) -#define L3_SCC_MASK REG_GENMASK(3, 1) -#define L3_SCC(value) REG_FIELD_PREP(L3_SCC_MASK, value) -#define L3_ESC_MASK REG_BIT(0) -#define L3_ESC(value) REG_FIELD_PREP(L3_ESC_MASK, value) - -#define XEHP_L3NODEARBCFG XE_REG_MCR(0xb0b4) -#define XEHP_LNESPARE REG_BIT(19) - -#define L3SQCREG3 XE_REG_MCR(0xb108) -#define COMPPWOVERFETCHEN REG_BIT(28) - -#define XEHP_L3SQCREG5 XE_REG_MCR(0xb158) -#define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0) - -#define XEHP_L3SCQREG7 XE_REG_MCR(0xb188) -#define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3) - -#define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8) - -#define XE2_GLOBAL_INVAL XE_REG(0xb404) - -#define SCRATCH1LPFC XE_REG(0xb474) -#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0) - -#define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658) - -#define XE2_TDF_CTRL XE_REG(0xb418) -#define TRANSIENT_FLUSH_REQUEST REG_BIT(0) - -#define XEHP_MERT_MOD_CTRL XE_REG_MCR(0xcf28) -#define RENDER_MOD_CTRL XE_REG_MCR(0xcf2c) -#define COMP_MOD_CTRL XE_REG_MCR(0xcf30) -#define XEHP_VDBX_MOD_CTRL XE_REG_MCR(0xcf34) -#define XELPMP_VDBX_MOD_CTRL XE_REG(0xcf34) -#define XEHP_VEBX_MOD_CTRL XE_REG_MCR(0xcf38) -#define XELPMP_VEBX_MOD_CTRL XE_REG(0xcf38) -#define FORCE_MISS_FTLB REG_BIT(3) - -#define XEHP_GAMSTLB_CTRL XE_REG_MCR(0xcf4c) -#define CONTROL_BLOCK_CLKGATE_DIS REG_BIT(12) -#define EGRESS_BLOCK_CLKGATE_DIS REG_BIT(11) -#define TAG_BLOCK_CLKGATE_DIS REG_BIT(7) - -#define XEHP_GAMCNTRL_CTRL XE_REG_MCR(0xcf54) -#define INVALIDATION_BROADCAST_MODE_DIS REG_BIT(12) -#define GLOBAL_INVALIDATION_MODE REG_BIT(2) - -#define HALF_SLICE_CHICKEN5 XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED) -#define DISABLE_SAMPLE_G_PERFORMANCE REG_BIT(0) - -#define SAMPLER_INSTDONE XE_REG_MCR(0xe160) -#define ROW_INSTDONE XE_REG_MCR(0xe164) - -#define SAMPLER_MODE XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED) -#define ENABLE_SMALLPL REG_BIT(15) -#define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) -#define SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) -#define INDIRECT_STATE_BASE_ADDR_OVERRIDE REG_BIT(0) - -#define HALF_SLICE_CHICKEN7 XE_REG_MCR(0xe194, XE_REG_OPTION_MASKED) -#define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15) -#define CLEAR_OPTIMIZATION_DISABLE REG_BIT(6) - -#define CACHE_MODE_SS XE_REG_MCR(0xe420, XE_REG_OPTION_MASKED) -#define DISABLE_ECC REG_BIT(5) -#define ENABLE_PREFETCH_INTO_IC REG_BIT(3) - -#define ROW_CHICKEN4 XE_REG_MCR(0xe48c, XE_REG_OPTION_MASKED) -#define DISABLE_GRF_CLEAR REG_BIT(13) -#define XEHP_DIS_BBL_SYSPIPE REG_BIT(11) -#define DISABLE_TDL_PUSH REG_BIT(9) -#define DIS_PICK_2ND_EU REG_BIT(7) -#define DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX REG_BIT(4) -#define THREAD_EX_ARB_MODE REG_GENMASK(3, 2) -#define THREAD_EX_ARB_MODE_RR_AFTER_DEP REG_FIELD_PREP(THREAD_EX_ARB_MODE, 0x2) - -#define ROW_CHICKEN3 XE_REG_MCR(0xe49c, XE_REG_OPTION_MASKED) -#define XE2_EUPEND_CHK_FLUSH_DIS REG_BIT(14) -#define DIS_FIX_EOT1_FLUSH REG_BIT(9) - -#define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED) -#define STK_ID_RESTRICT REG_BIT(12) -#define SLM_WMTP_RESTORE REG_BIT(11) - -#define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) -#define UGM_BACKUP_MODE REG_BIT(13) -#define MDQ_ARBITRATION_MODE REG_BIT(12) -#define STALL_DOP_GATING_DISABLE REG_BIT(5) -#define EARLY_EOT_DIS REG_BIT(1) - -#define ROW_CHICKEN2 XE_REG_MCR(0xe4f4, XE_REG_OPTION_MASKED) -#define DISABLE_READ_SUPPRESSION REG_BIT(15) -#define DISABLE_EARLY_READ REG_BIT(14) -#define ENABLE_LARGE_GRF_MODE REG_BIT(12) -#define PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8) -#define DISABLE_TDL_SVHS_GATING REG_BIT(1) -#define DISABLE_DOP_GATING REG_BIT(0) - -#define RT_CTRL XE_REG_MCR(0xe530) -#define DIS_NULL_QUERY REG_BIT(10) - -#define EU_SYSTOLIC_LIC_THROTTLE_CTL_WITH_LOCK XE_REG_MCR(0xe534) -#define EU_SYSTOLIC_LIC_THROTTLE_CTL_LOCK_BIT REG_BIT(31) - -#define XEHP_HDC_CHICKEN0 XE_REG_MCR(0xe5f0, XE_REG_OPTION_MASKED) -#define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) -#define DIS_ATOMIC_CHAINING_TYPED_WRITES REG_BIT(3) - -#define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8) -#define DISABLE_D8_D16_COASLESCE REG_BIT(30) -#define WR_REQ_CHAINING_DIS REG_BIT(26) -#define TGM_WRITE_EOM_FORCE REG_BIT(17) -#define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15) -#define SEQUENTIAL_ACCESS_UPGRADE_DISABLE REG_BIT(13) - -#define LSC_CHICKEN_BIT_0_UDW XE_REG_MCR(0xe7c8 + 4) -#define UGM_FRAGMENT_THRESHOLD_TO_3 REG_BIT(58 - 32) -#define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32) -#define XE2_ALLOC_DPA_STARVE_FIX_DIS REG_BIT(47 - 32) -#define ENABLE_SMP_LD_RENDER_SURFACE_CONTROL REG_BIT(44 - 32) -#define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) -#define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) -#define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32) -#define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32) - -#define SARB_CHICKEN1 XE_REG_MCR(0xe90c) -#define COMP_CKN_IN REG_GENMASK(30, 29) - -#define RCU_MODE XE_REG(0x14800, XE_REG_OPTION_MASKED) -#define RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1) -#define RCU_MODE_CCS_ENABLE REG_BIT(0) - -/* - * Total of 4 cslices, where each cslice is in the form: - * [0-3] CCS ID - * [4-6] RSVD - * [7] Disabled - */ -#define CCS_MODE XE_REG(0x14804) -#define CCS_MODE_CSLICE_0_3_MASK REG_GENMASK(11, 0) /* 3 bits per cslice */ -#define CCS_MODE_CSLICE_MASK 0x7 /* CCS0-3 + rsvd */ -#define CCS_MODE_CSLICE_WIDTH ilog2(CCS_MODE_CSLICE_MASK + 1) -#define CCS_MODE_CSLICE(cslice, ccs) \ - ((ccs) << ((cslice) * CCS_MODE_CSLICE_WIDTH)) - -#define FORCEWAKE_ACK_GT XE_REG(0x130044) - -/* Applicable for all FORCEWAKE_DOMAIN and FORCEWAKE_ACK_DOMAIN regs */ -#define FORCEWAKE_KERNEL 0 -#define FORCEWAKE_MT(bit) BIT(bit) -#define FORCEWAKE_MT_MASK(bit) BIT((bit) + 16) - -#define MTL_MEDIA_PERF_LIMIT_REASONS XE_REG(0x138030) -#define MTL_MEDIA_MC6 XE_REG(0x138048) - -#define GT_CORE_STATUS XE_REG(0x138060) -#define RCN_MASK REG_GENMASK(2, 0) -#define GT_C0 0 -#define GT_C6 3 - -#define GT_GFX_RC6_LOCKED XE_REG(0x138104) -#define GT_GFX_RC6 XE_REG(0x138108) - -#define GT0_PERF_LIMIT_REASONS XE_REG(0x1381a8) -#define GT0_PERF_LIMIT_REASONS_MASK 0xde3 -#define PROCHOT_MASK REG_BIT(0) -#define THERMAL_LIMIT_MASK REG_BIT(1) -#define RATL_MASK REG_BIT(5) -#define VR_THERMALERT_MASK REG_BIT(6) -#define VR_TDC_MASK REG_BIT(7) -#define POWER_LIMIT_4_MASK REG_BIT(8) -#define POWER_LIMIT_1_MASK REG_BIT(10) -#define POWER_LIMIT_2_MASK REG_BIT(11) - -#define GT_PERF_STATUS XE_REG(0x1381b4) -#define VOLTAGE_MASK REG_GENMASK(10, 0) - -/* - * Note: Interrupt registers 1900xx are VF accessible only until version 12.50. - * On newer platforms, VFs are using memory-based interrupts instead. - * However, for simplicity we keep this XE_REG_OPTION_VF tag intact. - */ - -#define GT_INTR_DW(x) XE_REG(0x190018 + ((x) * 4), XE_REG_OPTION_VF) -#define INTR_GSC REG_BIT(31) -#define INTR_GUC REG_BIT(25) -#define INTR_MGUC REG_BIT(24) -#define INTR_BCS8 REG_BIT(23) -#define INTR_BCS(x) REG_BIT(15 - (x)) -#define INTR_CCS(x) REG_BIT(4 + (x)) -#define INTR_RCS0 REG_BIT(0) -#define INTR_VECS(x) REG_BIT(31 - (x)) -#define INTR_VCS(x) REG_BIT(x) - -#define RENDER_COPY_INTR_ENABLE XE_REG(0x190030, XE_REG_OPTION_VF) -#define VCS_VECS_INTR_ENABLE XE_REG(0x190034, XE_REG_OPTION_VF) -#define GUC_SG_INTR_ENABLE XE_REG(0x190038, XE_REG_OPTION_VF) -#define ENGINE1_MASK REG_GENMASK(31, 16) -#define ENGINE0_MASK REG_GENMASK(15, 0) -#define GPM_WGBOXPERF_INTR_ENABLE XE_REG(0x19003c, XE_REG_OPTION_VF) -#define GUNIT_GSC_INTR_ENABLE XE_REG(0x190044, XE_REG_OPTION_VF) -#define CCS_RSVD_INTR_ENABLE XE_REG(0x190048, XE_REG_OPTION_VF) - -#define INTR_IDENTITY_REG(x) XE_REG(0x190060 + ((x) * 4), XE_REG_OPTION_VF) -#define INTR_DATA_VALID REG_BIT(31) -#define INTR_ENGINE_INSTANCE(x) REG_FIELD_GET(GENMASK(25, 20), x) -#define INTR_ENGINE_CLASS(x) REG_FIELD_GET(GENMASK(18, 16), x) -#define INTR_ENGINE_INTR(x) REG_FIELD_GET(GENMASK(15, 0), x) -#define OTHER_GUC_INSTANCE 0 -#define OTHER_GSC_HECI2_INSTANCE 3 -#define OTHER_GSC_INSTANCE 6 - -#define IIR_REG_SELECTOR(x) XE_REG(0x190070 + ((x) * 4), XE_REG_OPTION_VF) -#define RCS0_RSVD_INTR_MASK XE_REG(0x190090, XE_REG_OPTION_VF) -#define BCS_RSVD_INTR_MASK XE_REG(0x1900a0, XE_REG_OPTION_VF) -#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8, XE_REG_OPTION_VF) -#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac, XE_REG_OPTION_VF) -#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0, XE_REG_OPTION_VF) -#define HECI2_RSVD_INTR_MASK XE_REG(0x1900e4) -#define GUC_SG_INTR_MASK XE_REG(0x1900e8, XE_REG_OPTION_VF) -#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec, XE_REG_OPTION_VF) -#define GUNIT_GSC_INTR_MASK XE_REG(0x1900f4, XE_REG_OPTION_VF) -#define CCS0_CCS1_INTR_MASK XE_REG(0x190100) -#define CCS2_CCS3_INTR_MASK XE_REG(0x190104) -#define XEHPC_BCS1_BCS2_INTR_MASK XE_REG(0x190110) -#define XEHPC_BCS3_BCS4_INTR_MASK XE_REG(0x190114) -#define XEHPC_BCS5_BCS6_INTR_MASK XE_REG(0x190118) -#define XEHPC_BCS7_BCS8_INTR_MASK XE_REG(0x19011c) -#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11) -#define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8) -#define GSC_ER_COMPLETE REG_BIT(5) -#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4) -#define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3) -#define GT_RENDER_USER_INTERRUPT REG_BIT(0) - -#endif diff --git a/rr-cache/41549dd6cc337627199acbe6749c5685c7e927d3/preimage.7 b/rr-cache/41549dd6cc337627199acbe6749c5685c7e927d3/preimage.7 deleted file mode 100644 index 232d29e9a561..000000000000 --- a/rr-cache/41549dd6cc337627199acbe6749c5685c7e927d3/preimage.7 +++ /dev/null @@ -1,2234 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include <linux/dma-fence-chain.h> - -#include "xe_pt.h" - -#include "regs/xe_gtt_defs.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_gt.h" -#include "xe_gt_tlb_invalidation.h" -#include "xe_migrate.h" -#include "xe_pt_types.h" -#include "xe_pt_walk.h" -#include "xe_res_cursor.h" -#include "xe_trace.h" -#include "xe_ttm_stolen_mgr.h" -#include "xe_vm.h" - -struct xe_pt_dir { - struct xe_pt pt; - /** @children: Array of page-table child nodes */ - struct xe_ptw *children[XE_PDES]; -}; - -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) -#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) -#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) -#else -#define xe_pt_set_addr(__xe_pt, __addr) -#define xe_pt_addr(__xe_pt) 0ull -#endif - -static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; -static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; - -#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) - -static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) -{ - return container_of(pt, struct xe_pt_dir, pt); -} - -static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) -{ - return container_of(pt_dir->children[index], struct xe_pt, base); -} - -static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, - unsigned int level) -{ - struct xe_device *xe = tile_to_xe(tile); - u16 pat_index = xe->pat.idx[XE_CACHE_WB]; - u8 id = tile->id; - - if (!xe_vm_has_scratch(vm)) - return 0; - - if (level > MAX_HUGEPTE_LEVEL) - return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, - 0, pat_index); - - return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | - XE_PTE_NULL; -} - -static void xe_pt_free(struct xe_pt *pt) -{ - if (pt->level) - kfree(as_xe_pt_dir(pt)); - else - kfree(pt); -} - -/** - * xe_pt_create() - Create a page-table. - * @vm: The vm to create for. - * @tile: The tile to create for. - * @level: The page-table level. - * - * Allocate and initialize a single struct xe_pt metadata structure. Also - * create the corresponding page-table bo, but don't initialize it. If the - * level is grater than zero, then it's assumed to be a directory page- - * table and the directory structure is also allocated and initialized to - * NULL pointers. - * - * Return: A valid struct xe_pt pointer on success, Pointer error code on - * error. - */ -struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level) -{ - struct xe_pt *pt; - struct xe_bo *bo; - int err; - - if (level) { - struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL); - - pt = (dir) ? &dir->pt : NULL; - } else { - pt = kzalloc(sizeof(*pt), GFP_KERNEL); - } - if (!pt) - return ERR_PTR(-ENOMEM); - - pt->level = level; - bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_NO_RESV_EVICT | - XE_BO_FLAG_PAGETABLE); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - goto err_kfree; - } - pt->bo = bo; - pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; - - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, pt->bo); - xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL); - - return pt; - -err_kfree: - xe_pt_free(pt); - return ERR_PTR(err); -} - -/** - * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero - * entries. - * @tile: The tile the scratch pagetable of which to use. - * @vm: The vm we populate for. - * @pt: The pagetable the bo of which to initialize. - * - * Populate the page-table bo of @pt with entries pointing into the tile's - * scratch page-table tree if any. Otherwise populate with zeros. - */ -void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, - struct xe_pt *pt) -{ - struct iosys_map *map = &pt->bo->vmap; - u64 empty; - int i; - - if (!xe_vm_has_scratch(vm)) { - /* - * FIXME: Some memory is allocated already allocated to zero? - * Find out which memory that is and avoid this memset... - */ - xe_map_memset(vm->xe, map, 0, 0, SZ_4K); - } else { - empty = __xe_pt_empty_pte(tile, vm, pt->level); - for (i = 0; i < XE_PDES; i++) - xe_pt_write(vm->xe, map, i, empty); - } -} - -/** - * xe_pt_shift() - Return the ilog2 value of the size of the address range of - * a page-table at a certain level. - * @level: The level. - * - * Return: The ilog2 value of the size of the address range of a page-table - * at level @level. - */ -unsigned int xe_pt_shift(unsigned int level) -{ - return XE_PTE_SHIFT + XE_PDE_SHIFT * level; -} - -/** - * xe_pt_destroy() - Destroy a page-table tree. - * @pt: The root of the page-table tree to destroy. - * @flags: vm flags. Currently unused. - * @deferred: List head of lockless list for deferred putting. NULL for - * immediate putting. - * - * Puts the page-table bo, recursively calls xe_pt_destroy on all children - * and finally frees @pt. TODO: Can we remove the @flags argument? - */ -void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) -{ - int i; - - if (!pt) - return; - - XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list)); - xe_bo_unpin(pt->bo); - xe_bo_put_deferred(pt->bo, deferred); - - if (pt->level > 0 && pt->num_live) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - - for (i = 0; i < XE_PDES; i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, - deferred); - } - } - xe_pt_free(pt); -} - -/** - * DOC: Pagetable building - * - * Below we use the term "page-table" for both page-directories, containing - * pointers to lower level page-directories or page-tables, and level 0 - * page-tables that contain only page-table-entries pointing to memory pages. - * - * When inserting an address range in an already existing page-table tree - * there will typically be a set of page-tables that are shared with other - * address ranges, and a set that are private to this address range. - * The set of shared page-tables can be at most two per level, - * and those can't be updated immediately because the entries of those - * page-tables may still be in use by the gpu for other mappings. Therefore - * when inserting entries into those, we instead stage those insertions by - * adding insertion data into struct xe_vm_pgtable_update structures. This - * data, (subtrees for the cpu and page-table-entries for the gpu) is then - * added in a separate commit step. CPU-data is committed while still under the - * vm lock, the object lock and for userptr, the notifier lock in read mode. - * The GPU async data is committed either by the GPU or CPU after fulfilling - * relevant dependencies. - * For non-shared page-tables (and, in fact, for shared ones that aren't - * existing at the time of staging), we add the data in-place without the - * special update structures. This private part of the page-table tree will - * remain disconnected from the vm page-table tree until data is committed to - * the shared page tables of the vm tree in the commit phase. - */ - -struct xe_pt_update { - /** @update: The update structure we're building for this parent. */ - struct xe_vm_pgtable_update *update; - /** @parent: The parent. Used to detect a parent change. */ - struct xe_pt *parent; - /** @preexisting: Whether the parent was pre-existing or allocated */ - bool preexisting; -}; - -struct xe_pt_stage_bind_walk { - /** base: The base class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @vm: The vm we're building for. */ - struct xe_vm *vm; - /** @tile: The tile we're building for. */ - struct xe_tile *tile; - /** @default_pte: PTE flag only template. No address is associated */ - u64 default_pte; - /** @dma_offset: DMA offset to add to the PTE. */ - u64 dma_offset; - /** - * @needs_64k: This address range enforces 64K alignment and - * granularity. - */ - bool needs_64K; - /** - * @vma: VMA being mapped - */ - struct xe_vma *vma; - - /* Also input, but is updated during the walk*/ - /** @curs: The DMA address cursor. */ - struct xe_res_cursor *curs; - /** @va_curs_start: The Virtual address coresponding to @curs->start */ - u64 va_curs_start; - - /* Output */ - struct xe_walk_update { - /** @wupd.entries: Caller provided storage. */ - struct xe_vm_pgtable_update *entries; - /** @wupd.num_used_entries: Number of update @entries used. */ - unsigned int num_used_entries; - /** @wupd.updates: Tracks the update entry at a given level */ - struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; - } wupd; - - /* Walk state */ - /** - * @l0_end_addr: The end address of the current l0 leaf. Used for - * 64K granularity detection. - */ - u64 l0_end_addr; - /** @addr_64K: The start address of the current 64K chunk. */ - u64 addr_64K; - /** @found_64: Whether @add_64K actually points to a 64K chunk. */ - bool found_64K; -}; - -static int -xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, - pgoff_t offset, bool alloc_entries) -{ - struct xe_pt_update *upd = &wupd->updates[parent->level]; - struct xe_vm_pgtable_update *entry; - - /* - * For *each level*, we could only have one active - * struct xt_pt_update at any one time. Once we move on to a - * new parent and page-directory, the old one is complete, and - * updates are either already stored in the build tree or in - * @wupd->entries - */ - if (likely(upd->parent == parent)) - return 0; - - upd->parent = parent; - upd->preexisting = true; - - if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) - return -EINVAL; - - entry = wupd->entries + wupd->num_used_entries++; - upd->update = entry; - entry->ofs = offset; - entry->pt_bo = parent->bo; - entry->pt = parent; - entry->flags = 0; - entry->qwords = 0; - - if (alloc_entries) { - entry->pt_entries = kmalloc_array(XE_PDES, - sizeof(*entry->pt_entries), - GFP_KERNEL); - if (!entry->pt_entries) - return -ENOMEM; - } - - return 0; -} - -/* - * NOTE: This is a very frequently called function so we allow ourselves - * to annotate (using branch prediction hints) the fastpath of updating a - * non-pre-existing pagetable with leaf ptes. - */ -static int -xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, - pgoff_t offset, struct xe_pt *xe_child, u64 pte) -{ - struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; - struct xe_pt_update *child_upd = xe_child ? - &xe_walk->wupd.updates[xe_child->level] : NULL; - int ret; - - ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); - if (unlikely(ret)) - return ret; - - /* - * Register this new pagetable so that it won't be recognized as - * a shared pagetable by a subsequent insertion. - */ - if (unlikely(child_upd)) { - child_upd->update = NULL; - child_upd->parent = xe_child; - child_upd->preexisting = false; - } - - if (likely(!upd->preexisting)) { - /* Continue building a non-connected subtree. */ - struct iosys_map *map = &parent->bo->vmap; - - if (unlikely(xe_child)) - parent->base.children[offset] = &xe_child->base; - - xe_pt_write(xe_walk->vm->xe, map, offset, pte); - parent->num_live++; - } else { - /* Shared pt. Stage update. */ - unsigned int idx; - struct xe_vm_pgtable_update *entry = upd->update; - - idx = offset - entry->ofs; - entry->pt_entries[idx].pt = xe_child; - entry->pt_entries[idx].pte = pte; - entry->qwords++; - } - - return 0; -} - -static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, - struct xe_pt_stage_bind_walk *xe_walk) -{ - u64 size, dma; - - if (level > MAX_HUGEPTE_LEVEL) - return false; - - /* Does the virtual range requested cover a huge pte? */ - if (!xe_pt_covers(addr, next, level, &xe_walk->base)) - return false; - - /* Does the DMA segment cover the whole pte? */ - if (next - xe_walk->va_curs_start > xe_walk->curs->size) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - /* Is the DMA address huge PTE size aligned? */ - size = next - addr; - dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); - - return IS_ALIGNED(dma, size); -} - -/* - * Scan the requested mapping to check whether it can be done entirely - * with 64K PTEs. - */ -static bool -xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - struct xe_res_cursor curs = *xe_walk->curs; - - if (!IS_ALIGNED(addr, SZ_64K)) - return false; - - if (next > xe_walk->l0_end_addr) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - xe_res_next(&curs, addr - xe_walk->va_curs_start); - for (; addr < next; addr += SZ_64K) { - if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) - return false; - - xe_res_next(&curs, SZ_64K); - } - - return addr == next; -} - -/* - * For non-compact "normal" 4K level-0 pagetables, we want to try to group - * addresses together in 64K-contigous regions to add a 64K TLB hint for the - * device to the PTE. - * This function determines whether the address is part of such a - * segment. For VRAM in normal pagetables, this is strictly necessary on - * some devices. - */ -static bool -xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - /* Address is within an already found 64k region */ - if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) - return true; - - xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); - xe_walk->addr_64K = addr; - - return xe_walk->found_64K; -} - -static int -xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_bind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - u16 pat_index = xe_walk->vma->pat_index; - struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); - struct xe_vm *vm = xe_walk->vm; - struct xe_pt *xe_child; - bool covers; - int ret = 0; - u64 pte; - - /* Is this a leaf entry ?*/ - if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { - struct xe_res_cursor *curs = xe_walk->curs; - bool is_null = xe_vma_is_null(xe_walk->vma); - - XE_WARN_ON(xe_walk->va_curs_start != addr); - - pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : - xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->vma, pat_index, level); - pte |= xe_walk->default_pte; - - /* - * Set the XE_PTE_PS64 hint if possible, otherwise if - * this device *requires* 64K PTE size for VRAM, fail. - */ - if (level == 0 && !xe_parent->is_compact) { - if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; - pte |= XE_PTE_PS64; - } else if (XE_WARN_ON(xe_walk->needs_64K)) { - return -EINVAL; - } - } - - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); - if (unlikely(ret)) - return ret; - - if (!is_null) - xe_res_next(curs, next - addr); - xe_walk->va_curs_start = next; - xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); - *action = ACTION_CONTINUE; - - return ret; - } - - /* - * Descending to lower level. Determine if we need to allocate a - * new page table or -directory, which we do if there is no - * previous one or there is one we can completely replace. - */ - if (level == 1) { - walk->shifts = xe_normal_pt_shifts; - xe_walk->l0_end_addr = next; - } - - covers = xe_pt_covers(addr, next, level, &xe_walk->base); - if (covers || !*child) { - u64 flags = 0; - - xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); - if (IS_ERR(xe_child)) - return PTR_ERR(xe_child); - - xe_pt_set_addr(xe_child, - round_down(addr, 1ull << walk->shifts[level])); - - if (!covers) - xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child); - - *child = &xe_child->base; - - /* - * Prefer the compact pagetable layout for L0 if possible. Only - * possible if VMA covers entire 2MB region as compact 64k and - * 4k pages cannot be mixed within a 2MB region. - * TODO: Suballocate the pt bo to avoid wasting a lot of - * memory. - */ - if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && - covers && xe_pt_scan_64K(addr, next, xe_walk)) { - walk->shifts = xe_compact_pt_shifts; - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT; - flags |= XE_PDE_64K; - xe_child->is_compact = true; - } - - pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, - pte); - } - - *action = ACTION_SUBTREE; - return ret; -} - -static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { - .pt_entry = xe_pt_stage_bind_entry, -}; - -/** - * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address - * range. - * @tile: The tile we're building for. - * @vma: The vma indicating the address range. - * @entries: Storage for the update entries used for connecting the tree to - * the main tree at commit time. - * @num_entries: On output contains the number of @entries used. - * - * This function builds a disconnected page-table tree for a given address - * range. The tree is connected to the main vm tree for the gpu using - * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). - * The function builds xe_vm_pgtable_update structures for already existing - * shared page-tables, and non-existing shared and non-shared page-tables - * are built and populated directly. - * - * Return 0 on success, negative error code on error. - */ -static int -xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - struct xe_device *xe = tile_to_xe(tile); - struct xe_bo *bo = xe_vma_bo(vma); - bool is_devmem = !xe_vma_is_userptr(vma) && bo && - (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); - struct xe_res_cursor curs; - struct xe_pt_stage_bind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_bind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .vm = xe_vma_vm(vma), - .tile = tile, - .curs = &curs, - .va_curs_start = xe_vma_start(vma), - .vma = vma, - .wupd.entries = entries, - .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - int ret; - - /** - * Default atomic expectations for different allocation scenarios are as follows: - * - * 1. Traditional API: When the VM is not in LR mode: - * - Device atomics are expected to function with all allocations. - * - * 2. Compute/SVM API: When the VM is in LR mode: - * - Device atomics are the default behavior when the bo is placed in a single region. - * - In all other cases device atomics will be disabled with AE=0 until an application - * request differently using a ioctl like madvise. - */ - if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { - if (xe_vm_in_lr_mode(xe_vma_vm(vma))) { - if (bo && xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - /** - * If a SMEM+LMEM allocation is backed by SMEM, a device - * atomics will cause a gpu page fault and which then - * gets migrated to LMEM, bind such allocations with - * device atomics enabled. - */ - else if (is_devmem && !xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } else { - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } - - /** - * Unset AE if the platform(PVC) doesn't support it on an - * allocation - */ - if (!xe->info.has_device_atomics_on_smem && !is_devmem) - xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE; - } - - if (is_devmem) { - xe_walk.default_pte |= XE_PPGTT_PTE_DM; - xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); - } - - if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) - xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); - - xe_bo_assert_held(bo); - - if (!xe_vma_is_null(vma)) { - if (xe_vma_is_userptr(vma)) - xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, - xe_vma_size(vma), &curs); - else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) - xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - else - xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - } else { - curs.size = xe_vma_size(vma); - } - - ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - *num_entries = xe_walk.wupd.num_used_entries; - return ret; -} - -/** - * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a - * shared pagetable. - * @addr: The start address within the non-shared pagetable. - * @end: The end address within the non-shared pagetable. - * @level: The level of the non-shared pagetable. - * @walk: Walk info. The function adjusts the walk action. - * @action: next action to perform (see enum page_walk_action) - * @offset: Ignored on input, First non-shared entry on output. - * @end_offset: Ignored on input, Last non-shared entry + 1 on output. - * - * A non-shared page-table has some entries that belong to the address range - * and others that don't. This function determines the entries that belong - * fully to the address range. Depending on level, some entries may - * partially belong to the address range (that can't happen at level 0). - * The function detects that and adjust those offsets to not include those - * partial entries. Iff it does detect partial entries, we know that there must - * be shared page tables also at lower levels, so it adjusts the walk action - * accordingly. - * - * Return: true if there were non-shared entries, false otherwise. - */ -static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, - struct xe_pt_walk *walk, - enum page_walk_action *action, - pgoff_t *offset, pgoff_t *end_offset) -{ - u64 size = 1ull << walk->shifts[level]; - - *offset = xe_pt_offset(addr, level, walk); - *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; - - if (!level) - return true; - - /* - * If addr or next are not size aligned, there are shared pts at lower - * level, so in that case traverse down the subtree - */ - *action = ACTION_CONTINUE; - if (!IS_ALIGNED(addr, size)) { - *action = ACTION_SUBTREE; - (*offset)++; - } - - if (!IS_ALIGNED(end, size)) { - *action = ACTION_SUBTREE; - (*end_offset)--; - } - - return *end_offset > *offset; -} - -struct xe_pt_zap_ptes_walk { - /** @base: The walk base-class */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're building for */ - struct xe_tile *tile; - - /* Output */ - /** @needs_invalidate: Whether we need to invalidate TLB*/ - bool needs_invalidate; -}; - -static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_zap_ptes_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - /* - * Note that we're called from an entry callback, and we're dealing - * with the child of that entry rather than the parent, so need to - * adjust level down. - */ - if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, - &end_offset)) { - xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap, - offset * sizeof(u64), 0, - (end_offset - offset) * sizeof(u64)); - xe_walk->needs_invalidate = true; - } - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { - .pt_entry = xe_pt_zap_ptes_entry, -}; - -/** - * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range - * @tile: The tile we're zapping for. - * @vma: GPU VMA detailing address range. - * - * Eviction and Userptr invalidation needs to be able to zap the - * gpu ptes of a given address range in pagefaulting mode. - * In order to be able to do that, that function needs access to the shared - * page-table entrieaso it can either clear the leaf PTEs or - * clear the pointers to lower-level page-tables. The caller is required - * to hold the necessary locks to ensure neither the page-table connectivity - * nor the page-table entries of the range is updated from under us. - * - * Return: Whether ptes were actually updated and a TLB invalidation is - * required. - */ -bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) -{ - struct xe_pt_zap_ptes_walk xe_walk = { - .base = { - .ops = &xe_pt_zap_ptes_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); - - if (!(pt_mask & BIT(tile->id))) - return false; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.needs_invalidate; -} - -static void -xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, - struct iosys_map *map, void *data, - u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_pt_entry *ptes = update->pt_entries; - u64 *ptr = data; - u32 i; - - for (i = 0; i < num_qwords; i++) { - if (map) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, ptes[i].pte); - else - ptr[i] = ptes[i].pte; - } -} - -static void xe_pt_abort_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i, j; - - for (i = 0; i < num_entries; i++) { - if (!entries[i].pt_entries) - continue; - - for (j = 0; j < entries[i].qwords; j++) - xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL); - kfree(entries[i].pt_entries); - } -} - -static void xe_pt_commit_locks_assert(struct xe_vma *vma) -{ - struct xe_vm *vm = xe_vma_vm(vma); - - lockdep_assert_held(&vm->lock); - - if (xe_vma_is_userptr(vma)) - lockdep_assert_held_read(&vm->userptr.notifier_lock); - else if (!xe_vma_is_null(vma)) - dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); - - xe_vm_assert_held(vm); -} - -static void xe_pt_commit_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries, bool rebind, - struct llist_head *deferred) -{ - u32 i, j; - - xe_pt_commit_locks_assert(vma); - - for (i = 0; i < num_entries; i++) { - struct xe_pt *pt = entries[i].pt; - struct xe_pt_dir *pt_dir; - - if (!rebind) - pt->num_live += entries[i].qwords; - - if (!pt->level) { - kfree(entries[i].pt_entries); - continue; - } - - pt_dir = as_xe_pt_dir(pt); - for (j = 0; j < entries[i].qwords; j++) { - u32 j_ = j + entries[i].ofs; - struct xe_pt *newpte = entries[i].pt_entries[j].pt; - - if (xe_pt_entry(pt_dir, j_)) - xe_pt_destroy(xe_pt_entry(pt_dir, j_), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[j_] = &newpte->base; - } - kfree(entries[i].pt_entries); - } -} - -static int -xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - int err; - - *num_entries = 0; - err = xe_pt_stage_bind(tile, vma, entries, num_entries); - if (!err) - xe_tile_assert(tile, *num_entries); - else /* abort! */ - xe_pt_abort_bind(vma, entries, *num_entries); - - return err; -} - -static void xe_vm_dbg_print_entries(struct xe_device *xe, - const struct xe_vm_pgtable_update *entries, - unsigned int num_entries) -#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) -{ - unsigned int i; - - vm_dbg(&xe->drm, "%u entries to update\n", num_entries); - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - struct xe_pt *xe_pt = entry->pt; - u64 page_size = 1ull << xe_pt_shift(xe_pt->level); - u64 end; - u64 start; - - xe_assert(xe, !entry->pt->is_compact); - start = entry->ofs * page_size; - end = start + page_size * entry->qwords; - vm_dbg(&xe->drm, - "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", - i, xe_pt->level, entry->ofs, entry->qwords, - xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); - } -} -#else -{} -#endif - -#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT - -static int xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; - static u32 count; - - if (count++ % divisor == divisor - 1) { - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - - uvma->userptr.divisor = divisor << 1; - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - return true; - } - - return false; -} - -#else - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - return false; -} - -#endif - -/** - * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks - * @base: Base we derive from. - * @bind: Whether this is a bind or an unbind operation. A bind operation - * makes the pre-commit callback error with -EAGAIN if it detects a - * pending invalidation. - * @locked: Whether the pre-commit callback locked the userptr notifier lock - * and it needs unlocking. - */ -struct xe_pt_migrate_pt_update { - struct xe_migrate_pt_update base; - bool bind; - bool locked; -}; - -/* - * This function adds the needed dependencies to a page-table update job - * to make sure racing jobs for separate bind engines don't race writing - * to the same page-table range, wreaking havoc. Initially use a single - * fence for the entire VM. An optimization would use smaller granularity. - */ -static int xe_pt_vm_dependencies(struct xe_sched_job *job, - struct xe_range_fence_tree *rftree, - u64 start, u64 last) -{ - struct xe_range_fence *rtfence; - struct dma_fence *fence; - int err; - - rtfence = xe_range_fence_tree_first(rftree, start, last); - while (rtfence) { - fence = rtfence->fence; - - if (!dma_fence_is_signaled(fence)) { - /* - * Is this a CPU update? GPU is busy updating, so return - * an error - */ - if (!job) - return -ETIME; - - dma_fence_get(fence); - err = drm_sched_job_add_dependency(&job->drm, fence); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_next(rtfence, start, last); - } - - return 0; -} - -static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_range_fence_tree *rftree = - &xe_vma_vm(pt_update->vma)->rftree[pt_update->tile_id]; - - return xe_pt_vm_dependencies(pt_update->job, rftree, - pt_update->start, pt_update->last); -} - -static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_pt_migrate_pt_update *userptr_update = - container_of(pt_update, typeof(*userptr_update), base); - struct xe_userptr_vma *uvma = to_userptr_vma(pt_update->vma); - unsigned long notifier_seq = uvma->userptr.notifier_seq; - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - int err = xe_pt_vm_dependencies(pt_update->job, - &vm->rftree[pt_update->tile_id], - pt_update->start, - pt_update->last); - - if (err) - return err; - - userptr_update->locked = false; - - /* - * Wait until nobody is running the invalidation notifier, and - * since we're exiting the loop holding the notifier lock, - * nobody can proceed invalidating either. - * - * Note that we don't update the vma->userptr.notifier_seq since - * we don't update the userptr pages. - */ - do { - down_read(&vm->userptr.notifier_lock); - if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq)) - break; - - up_read(&vm->userptr.notifier_lock); - - if (userptr_update->bind) - return -EAGAIN; - - notifier_seq = mmu_interval_read_begin(&uvma->userptr.notifier); - } while (true); - - /* Inject errors to test_whether they are handled correctly */ - if (userptr_update->bind && xe_pt_userptr_inject_eagain(uvma)) { - up_read(&vm->userptr.notifier_lock); - return -EAGAIN; - } - - userptr_update->locked = true; - - return 0; -} - -static const struct xe_migrate_pt_update_ops bind_ops = { - .populate = xe_vm_populate_pgtable, - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_bind_ops = { - .populate = xe_vm_populate_pgtable, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -struct invalidation_fence { - struct xe_gt_tlb_invalidation_fence base; - struct xe_gt *gt; - struct dma_fence *fence; - struct dma_fence_cb cb; - struct work_struct work; - u64 start; - u64 end; - u32 asid; -}; - -static void invalidation_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct invalidation_fence *ifence = - container_of(cb, struct invalidation_fence, cb); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); - if (!ifence->fence->error) { - queue_work(system_wq, &ifence->work); - } else { - ifence->base.base.error = ifence->fence->error; - dma_fence_signal(&ifence->base.base); - dma_fence_put(&ifence->base.base); - } - dma_fence_put(ifence->fence); -} - -static void invalidation_fence_work_func(struct work_struct *w) -{ - struct invalidation_fence *ifence = - container_of(w, struct invalidation_fence, work); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); - xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, - ifence->end, ifence->asid); -} - -static int invalidation_fence_init(struct xe_gt *gt, - struct invalidation_fence *ifence, - struct dma_fence *fence, - u64 start, u64 end, u32 asid) -{ - int ret; - - trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - - ifence->fence = fence; - ifence->gt = gt; - ifence->start = start; - ifence->end = end; - ifence->asid = asid; - - INIT_WORK(&ifence->work, invalidation_fence_work_func); - ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); - if (ret == -ENOENT) { - dma_fence_put(ifence->fence); /* Usually dropped in CB */ - invalidation_fence_work_func(&ifence->work); - } else if (ret) { - dma_fence_put(&ifence->base.base); /* Caller ref */ - dma_fence_put(&ifence->base.base); /* Creation ref */ - } - - xe_gt_assert(gt, !ret || ret == -ENOENT); - - return ret && ret != -ENOENT ? ret : 0; -} - -static void xe_pt_calc_rfence_interval(struct xe_vma *vma, - struct xe_pt_migrate_pt_update *update, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - int i, level = 0; - - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - update->base.start = ALIGN_DOWN(xe_vma_start(vma), - 0x1ull << xe_pt_shift(level)); - update->base.last = ALIGN(xe_vma_end(vma), - 0x1ull << xe_pt_shift(level)) - 1; -} - -/** - * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma - * address range. - * @tile: The tile to bind for. - * @vma: The vma to bind. - * @q: The exec_queue with which to do pipelined page-table updates. - * @syncs: Entries to sync on before binding the built tree to the live vm tree. - * @num_syncs: Number of @sync entries. - * @rebind: Whether we're rebinding this vma to the same address range without - * an unbind in-between. - * - * This function builds a page-table tree (see xe_pt_stage_bind() for more - * information on page-table building), and the xe_vm_pgtable_update entries - * abstracting the operations needed to attach it to the main vm tree. It - * then takes the relevant locks and updates the metadata side of the main - * vm tree and submits the operations for pipelined attachment of the - * gpu page-table to the vm main tree, (which can be done either by the - * cpu and the GPU). - * - * Return: A valid dma-fence representing the pipelined attachment operation - * on success, an error pointer on error. - */ -struct dma_fence * -__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs, - bool rebind) -{ - struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; - struct xe_pt_migrate_pt_update bind_pt_update = { - .base = { - .ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops, - .vma = vma, - .tile_id = tile->id, - }, - .bind = true, - }; - struct xe_vm *vm = xe_vma_vm(vma); - u32 num_entries; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL; - struct xe_range_fence *rfence; - int err; - - bind_pt_update.locked = false; - xe_bo_assert_held(xe_vma_bo(vma)); - xe_vm_assert_held(vm); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma), q); - - err = xe_pt_prepare_bind(tile, vma, entries, &num_entries); - if (err) - goto err; - - err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); - if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); - if (err) - goto err; - - xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); - - xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); - xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries, - num_entries); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. on LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * on !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) - return ERR_PTR(-ENOMEM); - } else if (rebind && !xe_vm_in_lr_mode(vm)) { - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - } - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - kfree(ifence); - return ERR_PTR(-ENOMEM); - } - - fence = xe_migrate_update_pgtables(tile->migrate, - vm, xe_vma_bo(vma), q, - entries, num_entries, - syncs, num_syncs, - &bind_pt_update.base); - if (!IS_ERR(fence)) { - bool last_munmap_rebind = vma->gpuva.flags & XE_VMA_LAST_REBIND; - LLIST_HEAD(deferred); - int err; - - err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - bind_pt_update.base.start, - bind_pt_update.base.last, fence); - if (err) - dma_fence_wait(fence, false); - - /* TLB invalidation must be done before signaling rebind */ - if (ifence) { - int err = invalidation_fence_init(tile->primary_gt, - ifence, fence, - xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); - if (err) { - dma_fence_put(fence); - kfree(ifence); - return ERR_PTR(err); - } - fence = &ifence->base.base; - } - - /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, rebind || - last_munmap_rebind ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - DMA_RESV_USAGE_BOOKKEEP); - xe_pt_commit_bind(vma, entries, num_entries, rebind, - bind_pt_update.locked ? &deferred : NULL); - - /* This vma is live (again?) now */ - vma->tile_present |= BIT(tile->id); - - if (bind_pt_update.locked) { - to_userptr_vma(vma)->userptr.initial_bind = true; - up_read(&vm->userptr.notifier_lock); - xe_bo_put_commit(&deferred); - } - if (!rebind && last_munmap_rebind && - xe_vm_in_preempt_fence_mode(vm)) - xe_vm_queue_rebind_worker(vm); - } else { - kfree(rfence); - kfree(ifence); - if (bind_pt_update.locked) - up_read(&vm->userptr.notifier_lock); - xe_pt_abort_bind(vma, entries, num_entries); - } - - return fence; - -err: - return ERR_PTR(err); -} - -struct xe_pt_stage_unbind_walk { - /** @base: The pagewalk base-class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're unbinding from. */ - struct xe_tile *tile; - - /** - * @modified_start: Walk range start, modified to include any - * shared pagetables that we're the only user of and can thus - * treat as private. - */ - u64 modified_start; - /** @modified_end: Walk range start, modified like @modified_start. */ - u64 modified_end; - - /* Output */ - /* @wupd: Structure to track the page-table updates we're building */ - struct xe_walk_update wupd; -}; - -/* - * Check whether this range is the only one populating this pagetable, - * and in that case, update the walk range checks so that higher levels don't - * view us as a shared pagetable. - */ -static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, - const struct xe_pt *child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - unsigned int shift = walk->shifts[level]; - u64 size = 1ull << shift; - - if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && - ((next - addr) >> shift) == child->num_live) { - u64 size = 1ull << walk->shifts[level + 1]; - - *action = ACTION_CONTINUE; - - if (xe_walk->modified_start >= addr) - xe_walk->modified_start = round_down(addr, size); - if (xe_walk->modified_end <= next) - xe_walk->modified_end = round_up(next, size); - - return true; - } - - return false; -} - -static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); - - return 0; -} - -static int -xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - u64 size = 1ull << walk->shifts[--level]; - - if (!IS_ALIGNED(addr, size)) - addr = xe_walk->modified_start; - if (!IS_ALIGNED(next, size)) - next = xe_walk->modified_end; - - /* Parent == *child is the root pt. Don't kill it. */ - if (parent != *child && - xe_pt_check_kill(addr, next, level, xe_child, action, walk)) - return 0; - - if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, - &end_offset)) - return 0; - - (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false); - xe_walk->wupd.updates[level].update->qwords = end_offset - offset; - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { - .pt_entry = xe_pt_stage_unbind_entry, - .pt_post_descend = xe_pt_stage_unbind_post_descend, -}; - -/** - * xe_pt_stage_unbind() - Build page-table update structures for an unbind - * operation - * @tile: The tile we're unbinding for. - * @vma: The vma we're unbinding. - * @entries: Caller-provided storage for the update structures. - * - * Builds page-table update structures for an unbind operation. The function - * will attempt to remove all page-tables that we're the only user - * of, and for that to work, the unbind operation must be committed in the - * same critical section that blocks racing binds to the same page-table tree. - * - * Return: The number of entries used. - */ -static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries) -{ - struct xe_pt_stage_unbind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_unbind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - .modified_start = xe_vma_start(vma), - .modified_end = xe_vma_end(vma), - .wupd.entries = entries, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.wupd.num_used_entries; -} - -static void -xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, - struct xe_tile *tile, struct iosys_map *map, - void *ptr, u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_vma *vma = pt_update->vma; - u64 empty = __xe_pt_empty_pte(tile, xe_vma_vm(vma), update->pt->level); - int i; - - if (map && map->is_iomem) - for (i = 0; i < num_qwords; ++i) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, empty); - else if (map) - memset64(map->vaddr + qword_ofs * sizeof(u64), empty, - num_qwords); - else - memset64(ptr, empty, num_qwords); -} - -static void -xe_pt_commit_unbind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 num_entries, - struct llist_head *deferred) -{ - u32 j; - - xe_pt_commit_locks_assert(vma); - - for (j = 0; j < num_entries; ++j) { - struct xe_vm_pgtable_update *entry = &entries[j]; - struct xe_pt *pt = entry->pt; - - pt->num_live -= entry->qwords; - if (pt->level) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - u32 i; - - for (i = entry->ofs; i < entry->ofs + entry->qwords; - i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[i] = NULL; - } - } - } -} - -<<<<<<< -static const struct xe_migrate_pt_update_ops unbind_ops = { - .populate = xe_migrate_clear_pgtable_callback, -======= -static void -xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int i, level = 0; - u64 start, last; - - for (i = 0; i < pt_op->num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &pt_op->entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level)); - last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1; - - if (start < pt_update_ops->start) - pt_update_ops->start = start; - if (last > pt_update_ops->last) - pt_update_ops->last = last; -} - -static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, - xe->info.tile_count); - - return 0; -} - -static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int err; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - pt_op->vma = NULL; - pt_op->bind = true; - pt_op->rebind = BIT(tile->id) & vma->tile_present; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - err = xe_pt_prepare_bind(tile, vma, pt_op->entries, - &pt_op->num_entries); - if (!err) { - xe_tile_assert(tile, pt_op->num_entries <= - ARRAY_SIZE(pt_op->entries)); - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, true); - - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. On LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * On !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!pt_op->rebind && xe_vm_has_scratch(vm) && - xe_vm_in_preempt_fence_mode(vm))) - pt_update_ops->needs_invalidation = true; - else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - - vma->tile_staged |= BIT(tile->id); - pt_op->vma = vma; - xe_pt_commit_prepare_bind(vma, pt_op->entries, - pt_op->num_entries, pt_op->rebind); - } else { - xe_pt_cancel_bind(vma, pt_op->entries, pt_op->num_entries); - } - - return err; -} - -static int unbind_op_prepare(struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int err; - - if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id))) - return 0; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - /* - * Wait for invalidation to complete. Can corrupt internal page table - * state if an invalidation is running while preparing an unbind. - */ - if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma))) - mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier); - - pt_op->vma = vma; - pt_op->bind = false; - pt_op->rebind = false; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries); - - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, false); - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - pt_update_ops->needs_invalidation = true; - - xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries); - - return 0; -} - -static int op_prepare(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op) -{ - int err = 0; - - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma); - pt_update_ops->wait_vm_kernel = true; - break; - case DRM_GPUVA_OP_REMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va)); - - if (!err && op->remap.prev) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.prev); - pt_update_ops->wait_vm_bookkeep = true; - } - if (!err && op->remap.next) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.next); - pt_update_ops->wait_vm_bookkeep = true; - } - break; - case DRM_GPUVA_OP_UNMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va)); - break; - case DRM_GPUVA_OP_PREFETCH: - err = bind_op_prepare(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va)); - pt_update_ops->wait_vm_kernel = true; - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static void -xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops) -{ - init_llist_head(&pt_update_ops->deferred); - pt_update_ops->start = ~0x0ull; - pt_update_ops->last = 0x0ull; -} - -/** - * xe_pt_update_ops_prepare() - Prepare PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Prepare PT update operations which includes updating internal PT state, - * allocate memory for page tables, populate page table being pruned in, and - * create PT update operations for leaf insertion / removal. - * - * Return: 0 on success, negative error code on error. - */ -int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct xe_vma_op *op; - int err; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - xe_pt_update_ops_init(pt_update_ops); - - err = dma_resv_reserve_fences(xe_vm_resv(vops->vm), - tile_to_xe(tile)->info.tile_count); - if (err) - return err; - - list_for_each_entry(op, &vops->list, link) { - err = op_prepare(vops->vm, tile, pt_update_ops, op); - - if (err) - return err; - } - - xe_tile_assert(tile, pt_update_ops->current_op <= - pt_update_ops->num_ops); - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE) - return -ENOSPC; -#endif - - return 0; -} - -static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present |= BIT(tile->id); - vma->tile_staged &= ~BIT(tile->id); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - to_userptr_vma(vma)->userptr.initial_bind = true; - } - - /* - * Kick rebind worker if this bind triggers preempt fences and not in - * the rebind worker - */ - if (pt_update_ops->wait_vm_bookkeep && - xe_vm_in_preempt_fence_mode(vm) && - !current->mm) - xe_vm_queue_rebind_worker(vm); -} - -static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present &= ~BIT(tile->id); - if (!vma->tile_present) { - list_del_init(&vma->combined_links.rebind); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - } -} - -static void op_commit(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op, struct dma_fence *fence, - struct dma_fence *fence2) -{ - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, - fence2); - break; - case DRM_GPUVA_OP_REMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence, - fence2); - - if (op->remap.prev) - bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence, fence2); - if (op->remap.next) - bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence, fence2); - break; - case DRM_GPUVA_OP_UNMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence, fence2); - break; - case DRM_GPUVA_OP_PREFETCH: - bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence, fence2); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } -} - -static const struct xe_migrate_pt_update_ops migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, ->>>>>>> - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { - .populate = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -/** - * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma - * address range. - * @tile: The tile to unbind for. - * @vma: The vma to unbind. - * @q: The exec_queue with which to do pipelined page-table updates. - * @syncs: Entries to sync on before disconnecting the tree to be destroyed. - * @num_syncs: Number of @sync entries. - * - * This function builds a the xe_vm_pgtable_update entries abstracting the - * operations needed to detach the page-table tree to be destroyed from the - * man vm tree. - * It then takes the relevant locks and submits the operations for - * pipelined detachment of the gpu page-table from the vm main tree, - * (which can be done either by the cpu and the GPU), Finally it frees the - * detached page-table tree. - * - * Return: A valid dma-fence representing the pipelined detachment operation - * on success, an error pointer on error. - */ -struct dma_fence * -__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs) -{ -<<<<<<< - struct xe_vm *vm = vops->vm; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL, *mfence = NULL; - struct dma_fence_chain *chain_fence = NULL; - struct xe_range_fence *rfence; - struct xe_vma_op *op; - int err = 0, i; - struct xe_migrate_pt_update update = { - .ops = pt_update_ops->needs_userptr_lock ? - &userptr_migrate_ops : - &migrate_ops, - .vops = vops, - .tile_id = tile->id, -======= - struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; - struct xe_pt_migrate_pt_update unbind_pt_update = { - .base = { - .ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops : - &unbind_ops, - .vma = vma, - .tile_id = tile->id, - }, ->>>>>>> - }; - struct xe_vm *vm = xe_vma_vm(vma); - u32 num_entries; - struct dma_fence *fence = NULL; - struct invalidation_fence *ifence; - struct xe_range_fence *rfence; - int err; - - LLIST_HEAD(deferred); - - xe_bo_assert_held(xe_vma_bo(vma)); - xe_vm_assert_held(vm); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma), q); - - num_entries = xe_pt_stage_unbind(tile, vma, entries); - xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); - - xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); - xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, - num_entries); - -<<<<<<< - err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); - if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); - if (err) - return ERR_PTR(err); - - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) - return ERR_PTR(-ENOMEM); -======= - if (pt_update_ops->needs_invalidation) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) { - err = -ENOMEM; - goto kill_vm_tile1; - } - if (tile->media_gt) { - mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!mfence) { - err = -ENOMEM; - goto free_ifence; - } - chain_fence = dma_fence_chain_alloc(); - if (!chain_fence) { - err = -ENOMEM; - goto free_ifence; - } - } - } ->>>>>>> - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - kfree(ifence); - return ERR_PTR(-ENOMEM); - } - - /* - * Even if we were already evicted and unbind to destroy, we need to - * clear again here. The eviction may have updated pagetables at a - * lower level, because it needs to be more conservative. - */ - fence = xe_migrate_update_pgtables(tile->migrate, - vm, NULL, q ? q : - vm->q[tile->id], - entries, num_entries, - syncs, num_syncs, - &unbind_pt_update.base); - if (!IS_ERR(fence)) { - int err; - - err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - unbind_pt_update.base.start, - unbind_pt_update.base.last, fence); - if (err) - dma_fence_wait(fence, false); - -<<<<<<< - /* TLB invalidation must be done before signaling unbind */ - err = invalidation_fence_init(tile->primary_gt, ifence, fence, - xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); - if (err) { - dma_fence_put(fence); - kfree(ifence); - return ERR_PTR(err); - } - fence = &ifence->base.base; - - /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, - DMA_RESV_USAGE_BOOKKEEP); - - /* This fence will be installed by caller when doing eviction */ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - DMA_RESV_USAGE_BOOKKEEP); - xe_pt_commit_unbind(vma, entries, num_entries, - unbind_pt_update.locked ? &deferred : NULL); - vma->tile_present &= ~BIT(tile->id); - } else { - kfree(rfence); - kfree(ifence); - } - - if (!vma->tile_present) - list_del_init(&vma->combined_links.rebind); - - if (unbind_pt_update.locked) { - xe_tile_assert(tile, xe_vma_is_userptr(vma)); -======= - xe_pt_commit(pt_op->vma, pt_op->entries, - pt_op->num_entries, &pt_update_ops->deferred); - pt_op->vma = NULL; /* skip in xe_pt_update_ops_abort */ - } - - if (xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - pt_update_ops->start, - pt_update_ops->last, fence)) - dma_fence_wait(fence, false); - - /* tlb invalidation must be done before signaling rebind */ - if (ifence) { - if (mfence) - dma_fence_get(fence); - invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - if (mfence) { - invalidation_fence_init(tile->media_gt, mfence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - dma_fence_chain_init(chain_fence, &ifence->base.base, - &mfence->base.base, 0); - fence = &chain_fence->base; - } else { - fence = &ifence->base.base; - } - } - - if (!mfence) { - dma_resv_add_fence(xe_vm_resv(vm), fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); - } else { - dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, - &ifence->base.base, &mfence->base.base); - } ->>>>>>> - - if (!vma->tile_present) { - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - up_read(&vm->userptr.notifier_lock); - xe_bo_put_commit(&deferred); - } - - return fence; -<<<<<<< -======= - -free_rfence: - kfree(rfence); -free_ifence: - dma_fence_chain_free(chain_fence); - kfree(mfence); - kfree(ifence); -kill_vm_tile1: - if (err != -EAGAIN && tile->id) - xe_vm_kill(vops->vm, false); - - return ERR_PTR(err); -} - -/** - * xe_pt_update_ops_fini() - Finish PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operations - * - * Finish PT update operations by committing to destroy page table memory - */ -void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - for (i = 0; i < pt_update_ops->current_op; ++i) { - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; - - xe_pt_free_bind(pt_op->entries, pt_op->num_entries); - } - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); -} - -/** - * xe_pt_update_ops_abort() - Abort PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Abort PT update operations by unwinding internal PT state - */ -void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - for (i = pt_update_ops->num_ops - 1; i >= 0; --i) { - struct xe_vm_pgtable_update_op *pt_op = - &pt_update_ops->ops[i]; - - if (!pt_op->vma || i >= pt_update_ops->current_op) - continue; - - if (pt_op->bind) - xe_pt_abort_bind(pt_op->vma, pt_op->entries, - pt_op->num_entries, - pt_op->rebind); - else - xe_pt_abort_unbind(pt_op->vma, pt_op->entries, - pt_op->num_entries); - } - - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); ->>>>>>> -} diff --git a/rr-cache/4951c0e45d299a9570812ec9f1cc27e11aa21d6e/preimage.5 b/rr-cache/4951c0e45d299a9570812ec9f1cc27e11aa21d6e/preimage.5 deleted file mode 100644 index bb6eadffff5e..000000000000 --- a/rr-cache/4951c0e45d299a9570812ec9f1cc27e11aa21d6e/preimage.5 +++ /dev/null @@ -1,2248 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include <linux/dma-fence-array.h> - -#include "xe_pt.h" - -#include "regs/xe_gtt_defs.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_gt.h" -#include "xe_gt_tlb_invalidation.h" -#include "xe_migrate.h" -#include "xe_pt_types.h" -#include "xe_pt_walk.h" -#include "xe_res_cursor.h" -#include "xe_trace.h" -#include "xe_ttm_stolen_mgr.h" -#include "xe_vm.h" - -struct xe_pt_dir { - struct xe_pt pt; - /** @children: Array of page-table child nodes */ - struct xe_ptw *children[XE_PDES]; -}; - -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) -#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) -#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) -#else -#define xe_pt_set_addr(__xe_pt, __addr) -#define xe_pt_addr(__xe_pt) 0ull -#endif - -static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; -static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; - -#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) - -static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) -{ - return container_of(pt, struct xe_pt_dir, pt); -} - -static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) -{ - return container_of(pt_dir->children[index], struct xe_pt, base); -} - -static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, - unsigned int level) -{ - struct xe_device *xe = tile_to_xe(tile); - u16 pat_index = xe->pat.idx[XE_CACHE_WB]; - u8 id = tile->id; - - if (!xe_vm_has_scratch(vm)) - return 0; - - if (level > MAX_HUGEPTE_LEVEL) - return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, - 0, pat_index); - - return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | - XE_PTE_NULL; -} - -static void xe_pt_free(struct xe_pt *pt) -{ - if (pt->level) - kfree(as_xe_pt_dir(pt)); - else - kfree(pt); -} - -/** - * xe_pt_create() - Create a page-table. - * @vm: The vm to create for. - * @tile: The tile to create for. - * @level: The page-table level. - * - * Allocate and initialize a single struct xe_pt metadata structure. Also - * create the corresponding page-table bo, but don't initialize it. If the - * level is grater than zero, then it's assumed to be a directory page- - * table and the directory structure is also allocated and initialized to - * NULL pointers. - * - * Return: A valid struct xe_pt pointer on success, Pointer error code on - * error. - */ -struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level) -{ - struct xe_pt *pt; - struct xe_bo *bo; - int err; - - if (level) { - struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL); - - pt = (dir) ? &dir->pt : NULL; - } else { - pt = kzalloc(sizeof(*pt), GFP_KERNEL); - } - if (!pt) - return ERR_PTR(-ENOMEM); - - pt->level = level; - bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_NO_RESV_EVICT | - XE_BO_FLAG_PAGETABLE); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - goto err_kfree; - } - pt->bo = bo; - pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; - - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, pt->bo); - xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL); - - return pt; - -err_kfree: - xe_pt_free(pt); - return ERR_PTR(err); -} - -/** - * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero - * entries. - * @tile: The tile the scratch pagetable of which to use. - * @vm: The vm we populate for. - * @pt: The pagetable the bo of which to initialize. - * - * Populate the page-table bo of @pt with entries pointing into the tile's - * scratch page-table tree if any. Otherwise populate with zeros. - */ -void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, - struct xe_pt *pt) -{ - struct iosys_map *map = &pt->bo->vmap; - u64 empty; - int i; - - if (!xe_vm_has_scratch(vm)) { - /* - * FIXME: Some memory is allocated already allocated to zero? - * Find out which memory that is and avoid this memset... - */ - xe_map_memset(vm->xe, map, 0, 0, SZ_4K); - } else { - empty = __xe_pt_empty_pte(tile, vm, pt->level); - for (i = 0; i < XE_PDES; i++) - xe_pt_write(vm->xe, map, i, empty); - } -} - -/** - * xe_pt_shift() - Return the ilog2 value of the size of the address range of - * a page-table at a certain level. - * @level: The level. - * - * Return: The ilog2 value of the size of the address range of a page-table - * at level @level. - */ -unsigned int xe_pt_shift(unsigned int level) -{ - return XE_PTE_SHIFT + XE_PDE_SHIFT * level; -} - -/** - * xe_pt_destroy() - Destroy a page-table tree. - * @pt: The root of the page-table tree to destroy. - * @flags: vm flags. Currently unused. - * @deferred: List head of lockless list for deferred putting. NULL for - * immediate putting. - * - * Puts the page-table bo, recursively calls xe_pt_destroy on all children - * and finally frees @pt. TODO: Can we remove the @flags argument? - */ -void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) -{ - int i; - - if (!pt) - return; - - XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list)); - xe_bo_unpin(pt->bo); - xe_bo_put_deferred(pt->bo, deferred); - - if (pt->level > 0 && pt->num_live) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - - for (i = 0; i < XE_PDES; i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, - deferred); - } - } - xe_pt_free(pt); -} - -/** - * DOC: Pagetable building - * - * Below we use the term "page-table" for both page-directories, containing - * pointers to lower level page-directories or page-tables, and level 0 - * page-tables that contain only page-table-entries pointing to memory pages. - * - * When inserting an address range in an already existing page-table tree - * there will typically be a set of page-tables that are shared with other - * address ranges, and a set that are private to this address range. - * The set of shared page-tables can be at most two per level, - * and those can't be updated immediately because the entries of those - * page-tables may still be in use by the gpu for other mappings. Therefore - * when inserting entries into those, we instead stage those insertions by - * adding insertion data into struct xe_vm_pgtable_update structures. This - * data, (subtrees for the cpu and page-table-entries for the gpu) is then - * added in a separate commit step. CPU-data is committed while still under the - * vm lock, the object lock and for userptr, the notifier lock in read mode. - * The GPU async data is committed either by the GPU or CPU after fulfilling - * relevant dependencies. - * For non-shared page-tables (and, in fact, for shared ones that aren't - * existing at the time of staging), we add the data in-place without the - * special update structures. This private part of the page-table tree will - * remain disconnected from the vm page-table tree until data is committed to - * the shared page tables of the vm tree in the commit phase. - */ - -struct xe_pt_update { - /** @update: The update structure we're building for this parent. */ - struct xe_vm_pgtable_update *update; - /** @parent: The parent. Used to detect a parent change. */ - struct xe_pt *parent; - /** @preexisting: Whether the parent was pre-existing or allocated */ - bool preexisting; -}; - -struct xe_pt_stage_bind_walk { - /** base: The base class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @vm: The vm we're building for. */ - struct xe_vm *vm; - /** @tile: The tile we're building for. */ - struct xe_tile *tile; - /** @default_pte: PTE flag only template. No address is associated */ - u64 default_pte; - /** @dma_offset: DMA offset to add to the PTE. */ - u64 dma_offset; - /** - * @needs_64k: This address range enforces 64K alignment and - * granularity. - */ - bool needs_64K; - /** - * @vma: VMA being mapped - */ - struct xe_vma *vma; - - /* Also input, but is updated during the walk*/ - /** @curs: The DMA address cursor. */ - struct xe_res_cursor *curs; - /** @va_curs_start: The Virtual address coresponding to @curs->start */ - u64 va_curs_start; - - /* Output */ - struct xe_walk_update { - /** @wupd.entries: Caller provided storage. */ - struct xe_vm_pgtable_update *entries; - /** @wupd.num_used_entries: Number of update @entries used. */ - unsigned int num_used_entries; - /** @wupd.updates: Tracks the update entry at a given level */ - struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; - } wupd; - - /* Walk state */ - /** - * @l0_end_addr: The end address of the current l0 leaf. Used for - * 64K granularity detection. - */ - u64 l0_end_addr; - /** @addr_64K: The start address of the current 64K chunk. */ - u64 addr_64K; - /** @found_64: Whether @add_64K actually points to a 64K chunk. */ - bool found_64K; -}; - -static int -xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, - pgoff_t offset, bool alloc_entries) -{ - struct xe_pt_update *upd = &wupd->updates[parent->level]; - struct xe_vm_pgtable_update *entry; - - /* - * For *each level*, we could only have one active - * struct xt_pt_update at any one time. Once we move on to a - * new parent and page-directory, the old one is complete, and - * updates are either already stored in the build tree or in - * @wupd->entries - */ - if (likely(upd->parent == parent)) - return 0; - - upd->parent = parent; - upd->preexisting = true; - - if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) - return -EINVAL; - - entry = wupd->entries + wupd->num_used_entries++; - upd->update = entry; - entry->ofs = offset; - entry->pt_bo = parent->bo; - entry->pt = parent; - entry->flags = 0; - entry->qwords = 0; - - if (alloc_entries) { - entry->pt_entries = kmalloc_array(XE_PDES, - sizeof(*entry->pt_entries), - GFP_KERNEL); - if (!entry->pt_entries) - return -ENOMEM; - } - - return 0; -} - -/* - * NOTE: This is a very frequently called function so we allow ourselves - * to annotate (using branch prediction hints) the fastpath of updating a - * non-pre-existing pagetable with leaf ptes. - */ -static int -xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, - pgoff_t offset, struct xe_pt *xe_child, u64 pte) -{ - struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; - struct xe_pt_update *child_upd = xe_child ? - &xe_walk->wupd.updates[xe_child->level] : NULL; - int ret; - - ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); - if (unlikely(ret)) - return ret; - - /* - * Register this new pagetable so that it won't be recognized as - * a shared pagetable by a subsequent insertion. - */ - if (unlikely(child_upd)) { - child_upd->update = NULL; - child_upd->parent = xe_child; - child_upd->preexisting = false; - } - - if (likely(!upd->preexisting)) { - /* Continue building a non-connected subtree. */ - struct iosys_map *map = &parent->bo->vmap; - - if (unlikely(xe_child)) - parent->base.children[offset] = &xe_child->base; - - xe_pt_write(xe_walk->vm->xe, map, offset, pte); - parent->num_live++; - } else { - /* Shared pt. Stage update. */ - unsigned int idx; - struct xe_vm_pgtable_update *entry = upd->update; - - idx = offset - entry->ofs; - entry->pt_entries[idx].pt = xe_child; - entry->pt_entries[idx].pte = pte; - entry->qwords++; - } - - return 0; -} - -static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, - struct xe_pt_stage_bind_walk *xe_walk) -{ - u64 size, dma; - - if (level > MAX_HUGEPTE_LEVEL) - return false; - - /* Does the virtual range requested cover a huge pte? */ - if (!xe_pt_covers(addr, next, level, &xe_walk->base)) - return false; - - /* Does the DMA segment cover the whole pte? */ - if (next - xe_walk->va_curs_start > xe_walk->curs->size) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - /* Is the DMA address huge PTE size aligned? */ - size = next - addr; - dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); - - return IS_ALIGNED(dma, size); -} - -/* - * Scan the requested mapping to check whether it can be done entirely - * with 64K PTEs. - */ -static bool -xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - struct xe_res_cursor curs = *xe_walk->curs; - - if (!IS_ALIGNED(addr, SZ_64K)) - return false; - - if (next > xe_walk->l0_end_addr) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - xe_res_next(&curs, addr - xe_walk->va_curs_start); - for (; addr < next; addr += SZ_64K) { - if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) - return false; - - xe_res_next(&curs, SZ_64K); - } - - return addr == next; -} - -/* - * For non-compact "normal" 4K level-0 pagetables, we want to try to group - * addresses together in 64K-contigous regions to add a 64K TLB hint for the - * device to the PTE. - * This function determines whether the address is part of such a - * segment. For VRAM in normal pagetables, this is strictly necessary on - * some devices. - */ -static bool -xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - /* Address is within an already found 64k region */ - if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) - return true; - - xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); - xe_walk->addr_64K = addr; - - return xe_walk->found_64K; -} - -static int -xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_bind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - u16 pat_index = xe_walk->vma->pat_index; - struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); - struct xe_vm *vm = xe_walk->vm; - struct xe_pt *xe_child; - bool covers; - int ret = 0; - u64 pte; - - /* Is this a leaf entry ?*/ - if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { - struct xe_res_cursor *curs = xe_walk->curs; - bool is_null = xe_vma_is_null(xe_walk->vma); - - XE_WARN_ON(xe_walk->va_curs_start != addr); - - pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : - xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->vma, pat_index, level); - pte |= xe_walk->default_pte; - - /* - * Set the XE_PTE_PS64 hint if possible, otherwise if - * this device *requires* 64K PTE size for VRAM, fail. - */ - if (level == 0 && !xe_parent->is_compact) { - if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; - pte |= XE_PTE_PS64; - } else if (XE_WARN_ON(xe_walk->needs_64K)) { - return -EINVAL; - } - } - - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); - if (unlikely(ret)) - return ret; - - if (!is_null) - xe_res_next(curs, next - addr); - xe_walk->va_curs_start = next; - xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); - *action = ACTION_CONTINUE; - - return ret; - } - - /* - * Descending to lower level. Determine if we need to allocate a - * new page table or -directory, which we do if there is no - * previous one or there is one we can completely replace. - */ - if (level == 1) { - walk->shifts = xe_normal_pt_shifts; - xe_walk->l0_end_addr = next; - } - - covers = xe_pt_covers(addr, next, level, &xe_walk->base); - if (covers || !*child) { - u64 flags = 0; - - xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); - if (IS_ERR(xe_child)) - return PTR_ERR(xe_child); - - xe_pt_set_addr(xe_child, - round_down(addr, 1ull << walk->shifts[level])); - - if (!covers) - xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child); - - *child = &xe_child->base; - - /* - * Prefer the compact pagetable layout for L0 if possible. Only - * possible if VMA covers entire 2MB region as compact 64k and - * 4k pages cannot be mixed within a 2MB region. - * TODO: Suballocate the pt bo to avoid wasting a lot of - * memory. - */ - if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && - covers && xe_pt_scan_64K(addr, next, xe_walk)) { - walk->shifts = xe_compact_pt_shifts; - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT; - flags |= XE_PDE_64K; - xe_child->is_compact = true; - } - - pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, - pte); - } - - *action = ACTION_SUBTREE; - return ret; -} - -static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { - .pt_entry = xe_pt_stage_bind_entry, -}; - -/** - * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address - * range. - * @tile: The tile we're building for. - * @vma: The vma indicating the address range. - * @entries: Storage for the update entries used for connecting the tree to - * the main tree at commit time. - * @num_entries: On output contains the number of @entries used. - * - * This function builds a disconnected page-table tree for a given address - * range. The tree is connected to the main vm tree for the gpu using - * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). - * The function builds xe_vm_pgtable_update structures for already existing - * shared page-tables, and non-existing shared and non-shared page-tables - * are built and populated directly. - * - * Return 0 on success, negative error code on error. - */ -static int -xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - struct xe_device *xe = tile_to_xe(tile); - struct xe_bo *bo = xe_vma_bo(vma); - bool is_devmem = !xe_vma_is_userptr(vma) && bo && - (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); - struct xe_res_cursor curs; - struct xe_pt_stage_bind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_bind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .vm = xe_vma_vm(vma), - .tile = tile, - .curs = &curs, - .va_curs_start = xe_vma_start(vma), - .vma = vma, - .wupd.entries = entries, - .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - int ret; - - /** - * Default atomic expectations for different allocation scenarios are as follows: - * - * 1. Traditional API: When the VM is not in LR mode: - * - Device atomics are expected to function with all allocations. - * - * 2. Compute/SVM API: When the VM is in LR mode: - * - Device atomics are the default behavior when the bo is placed in a single region. - * - In all other cases device atomics will be disabled with AE=0 until an application - * request differently using a ioctl like madvise. - */ - if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { - if (xe_vm_in_lr_mode(xe_vma_vm(vma))) { - if (bo && xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - /** - * If a SMEM+LMEM allocation is backed by SMEM, a device - * atomics will cause a gpu page fault and which then - * gets migrated to LMEM, bind such allocations with - * device atomics enabled. - */ - else if (is_devmem && !xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } else { - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } - - /** - * Unset AE if the platform(PVC) doesn't support it on an - * allocation - */ - if (!xe->info.has_device_atomics_on_smem && !is_devmem) - xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE; - } - - if (is_devmem) { - xe_walk.default_pte |= XE_PPGTT_PTE_DM; - xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); - } - - if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) - xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); - - xe_bo_assert_held(bo); - - if (!xe_vma_is_null(vma)) { - if (xe_vma_is_userptr(vma)) - xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, - xe_vma_size(vma), &curs); - else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) - xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - else - xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - } else { - curs.size = xe_vma_size(vma); - } - - ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - *num_entries = xe_walk.wupd.num_used_entries; - return ret; -} - -/** - * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a - * shared pagetable. - * @addr: The start address within the non-shared pagetable. - * @end: The end address within the non-shared pagetable. - * @level: The level of the non-shared pagetable. - * @walk: Walk info. The function adjusts the walk action. - * @action: next action to perform (see enum page_walk_action) - * @offset: Ignored on input, First non-shared entry on output. - * @end_offset: Ignored on input, Last non-shared entry + 1 on output. - * - * A non-shared page-table has some entries that belong to the address range - * and others that don't. This function determines the entries that belong - * fully to the address range. Depending on level, some entries may - * partially belong to the address range (that can't happen at level 0). - * The function detects that and adjust those offsets to not include those - * partial entries. Iff it does detect partial entries, we know that there must - * be shared page tables also at lower levels, so it adjusts the walk action - * accordingly. - * - * Return: true if there were non-shared entries, false otherwise. - */ -static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, - struct xe_pt_walk *walk, - enum page_walk_action *action, - pgoff_t *offset, pgoff_t *end_offset) -{ - u64 size = 1ull << walk->shifts[level]; - - *offset = xe_pt_offset(addr, level, walk); - *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; - - if (!level) - return true; - - /* - * If addr or next are not size aligned, there are shared pts at lower - * level, so in that case traverse down the subtree - */ - *action = ACTION_CONTINUE; - if (!IS_ALIGNED(addr, size)) { - *action = ACTION_SUBTREE; - (*offset)++; - } - - if (!IS_ALIGNED(end, size)) { - *action = ACTION_SUBTREE; - (*end_offset)--; - } - - return *end_offset > *offset; -} - -struct xe_pt_zap_ptes_walk { - /** @base: The walk base-class */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're building for */ - struct xe_tile *tile; - - /* Output */ - /** @needs_invalidate: Whether we need to invalidate TLB*/ - bool needs_invalidate; -}; - -static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_zap_ptes_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - /* - * Note that we're called from an entry callback, and we're dealing - * with the child of that entry rather than the parent, so need to - * adjust level down. - */ - if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, - &end_offset)) { - xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap, - offset * sizeof(u64), 0, - (end_offset - offset) * sizeof(u64)); - xe_walk->needs_invalidate = true; - } - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { - .pt_entry = xe_pt_zap_ptes_entry, -}; - -/** - * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range - * @tile: The tile we're zapping for. - * @vma: GPU VMA detailing address range. - * - * Eviction and Userptr invalidation needs to be able to zap the - * gpu ptes of a given address range in pagefaulting mode. - * In order to be able to do that, that function needs access to the shared - * page-table entrieaso it can either clear the leaf PTEs or - * clear the pointers to lower-level page-tables. The caller is required - * to hold the necessary locks to ensure neither the page-table connectivity - * nor the page-table entries of the range is updated from under us. - * - * Return: Whether ptes were actually updated and a TLB invalidation is - * required. - */ -bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) -{ - struct xe_pt_zap_ptes_walk xe_walk = { - .base = { - .ops = &xe_pt_zap_ptes_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); - - if (!(pt_mask & BIT(tile->id))) - return false; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.needs_invalidate; -} - -static void -xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, - struct iosys_map *map, void *data, - u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_pt_entry *ptes = update->pt_entries; - u64 *ptr = data; - u32 i; - - for (i = 0; i < num_qwords; i++) { - if (map) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, ptes[i].pte); - else - ptr[i] = ptes[i].pte; - } -} - -static void xe_pt_abort_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i, j; - - for (i = 0; i < num_entries; i++) { - if (!entries[i].pt_entries) - continue; - - for (j = 0; j < entries[i].qwords; j++) - xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL); - kfree(entries[i].pt_entries); - } -} - -static void xe_pt_commit_locks_assert(struct xe_vma *vma) -{ - struct xe_vm *vm = xe_vma_vm(vma); - - lockdep_assert_held(&vm->lock); - - if (xe_vma_is_userptr(vma)) - lockdep_assert_held_read(&vm->userptr.notifier_lock); - else if (!xe_vma_is_null(vma)) - dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); - - xe_vm_assert_held(vm); -} - -static void xe_pt_commit_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries, bool rebind, - struct llist_head *deferred) -{ - u32 i, j; - - xe_pt_commit_locks_assert(vma); - - for (i = 0; i < num_entries; i++) { - struct xe_pt *pt = entries[i].pt; - struct xe_pt_dir *pt_dir; - - if (!rebind) - pt->num_live += entries[i].qwords; - - if (!pt->level) { - kfree(entries[i].pt_entries); - continue; - } - - pt_dir = as_xe_pt_dir(pt); - for (j = 0; j < entries[i].qwords; j++) { - u32 j_ = j + entries[i].ofs; - struct xe_pt *newpte = entries[i].pt_entries[j].pt; - - if (xe_pt_entry(pt_dir, j_)) - xe_pt_destroy(xe_pt_entry(pt_dir, j_), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[j_] = &newpte->base; - } - kfree(entries[i].pt_entries); - } -} - -static int -xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - int err; - - *num_entries = 0; - err = xe_pt_stage_bind(tile, vma, entries, num_entries); - if (!err) - xe_tile_assert(tile, *num_entries); - else /* abort! */ - xe_pt_abort_bind(vma, entries, *num_entries); - - return err; -} - -static void xe_vm_dbg_print_entries(struct xe_device *xe, - const struct xe_vm_pgtable_update *entries, - unsigned int num_entries) -#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) -{ - unsigned int i; - - vm_dbg(&xe->drm, "%u entries to update\n", num_entries); - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - struct xe_pt *xe_pt = entry->pt; - u64 page_size = 1ull << xe_pt_shift(xe_pt->level); - u64 end; - u64 start; - - xe_assert(xe, !entry->pt->is_compact); - start = entry->ofs * page_size; - end = start + page_size * entry->qwords; - vm_dbg(&xe->drm, - "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", - i, xe_pt->level, entry->ofs, entry->qwords, - xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); - } -} -#else -{} -#endif - -#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT - -static int xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; - static u32 count; - - if (count++ % divisor == divisor - 1) { - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - - uvma->userptr.divisor = divisor << 1; - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - return true; - } - - return false; -} - -#else - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - return false; -} - -#endif - -/** - * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks - * @base: Base we derive from. - * @bind: Whether this is a bind or an unbind operation. A bind operation - * makes the pre-commit callback error with -EAGAIN if it detects a - * pending invalidation. - * @locked: Whether the pre-commit callback locked the userptr notifier lock - * and it needs unlocking. - */ -struct xe_pt_migrate_pt_update { - struct xe_migrate_pt_update base; - bool bind; - bool locked; -}; - -/* - * This function adds the needed dependencies to a page-table update job - * to make sure racing jobs for separate bind engines don't race writing - * to the same page-table range, wreaking havoc. Initially use a single - * fence for the entire VM. An optimization would use smaller granularity. - */ -static int xe_pt_vm_dependencies(struct xe_sched_job *job, - struct xe_range_fence_tree *rftree, - u64 start, u64 last) -{ - struct xe_range_fence *rtfence; - struct dma_fence *fence; - int err; - - rtfence = xe_range_fence_tree_first(rftree, start, last); - while (rtfence) { - fence = rtfence->fence; - - if (!dma_fence_is_signaled(fence)) { - /* - * Is this a CPU update? GPU is busy updating, so return - * an error - */ - if (!job) - return -ETIME; - - dma_fence_get(fence); - err = drm_sched_job_add_dependency(&job->drm, fence); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_next(rtfence, start, last); - } - - return 0; -} - -static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_range_fence_tree *rftree = - &xe_vma_vm(pt_update->vma)->rftree[pt_update->tile_id]; - - return xe_pt_vm_dependencies(pt_update->job, rftree, - pt_update->start, pt_update->last); -} - -static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_pt_migrate_pt_update *userptr_update = - container_of(pt_update, typeof(*userptr_update), base); - struct xe_userptr_vma *uvma = to_userptr_vma(pt_update->vma); - unsigned long notifier_seq = uvma->userptr.notifier_seq; - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - int err = xe_pt_vm_dependencies(pt_update->job, - &vm->rftree[pt_update->tile_id], - pt_update->start, - pt_update->last); - - if (err) - return err; - - userptr_update->locked = false; - - /* - * Wait until nobody is running the invalidation notifier, and - * since we're exiting the loop holding the notifier lock, - * nobody can proceed invalidating either. - * - * Note that we don't update the vma->userptr.notifier_seq since - * we don't update the userptr pages. - */ - do { - down_read(&vm->userptr.notifier_lock); - if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq)) - break; - - up_read(&vm->userptr.notifier_lock); - - if (userptr_update->bind) - return -EAGAIN; - - notifier_seq = mmu_interval_read_begin(&uvma->userptr.notifier); - } while (true); - - /* Inject errors to test_whether they are handled correctly */ - if (userptr_update->bind && xe_pt_userptr_inject_eagain(uvma)) { - up_read(&vm->userptr.notifier_lock); - return -EAGAIN; - } - - userptr_update->locked = true; - - return 0; -} - -static const struct xe_migrate_pt_update_ops bind_ops = { - .populate = xe_vm_populate_pgtable, - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_bind_ops = { - .populate = xe_vm_populate_pgtable, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -struct invalidation_fence { - struct xe_gt_tlb_invalidation_fence base; - struct xe_gt *gt; - struct dma_fence *fence; - struct dma_fence_cb cb; - struct work_struct work; - u64 start; - u64 end; - u32 asid; -}; - -static void invalidation_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct invalidation_fence *ifence = - container_of(cb, struct invalidation_fence, cb); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); - if (!ifence->fence->error) { - queue_work(system_wq, &ifence->work); - } else { - ifence->base.base.error = ifence->fence->error; - dma_fence_signal(&ifence->base.base); - dma_fence_put(&ifence->base.base); - } - dma_fence_put(ifence->fence); -} - -static void invalidation_fence_work_func(struct work_struct *w) -{ - struct invalidation_fence *ifence = - container_of(w, struct invalidation_fence, work); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); - xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, - ifence->end, ifence->asid); -} - -static int invalidation_fence_init(struct xe_gt *gt, - struct invalidation_fence *ifence, - struct dma_fence *fence, - u64 start, u64 end, u32 asid) -{ - int ret; - - trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - - ifence->fence = fence; - ifence->gt = gt; - ifence->start = start; - ifence->end = end; - ifence->asid = asid; - - INIT_WORK(&ifence->work, invalidation_fence_work_func); - ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); - if (ret == -ENOENT) { - dma_fence_put(ifence->fence); /* Usually dropped in CB */ - invalidation_fence_work_func(&ifence->work); - } else if (ret) { - dma_fence_put(&ifence->base.base); /* Caller ref */ - dma_fence_put(&ifence->base.base); /* Creation ref */ - } - - xe_gt_assert(gt, !ret || ret == -ENOENT); - - return ret && ret != -ENOENT ? ret : 0; -} - -static void xe_pt_calc_rfence_interval(struct xe_vma *vma, - struct xe_pt_migrate_pt_update *update, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - int i, level = 0; - - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - update->base.start = ALIGN_DOWN(xe_vma_start(vma), - 0x1ull << xe_pt_shift(level)); - update->base.last = ALIGN(xe_vma_end(vma), - 0x1ull << xe_pt_shift(level)) - 1; -} - -/** - * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma - * address range. - * @tile: The tile to bind for. - * @vma: The vma to bind. - * @q: The exec_queue with which to do pipelined page-table updates. - * @syncs: Entries to sync on before binding the built tree to the live vm tree. - * @num_syncs: Number of @sync entries. - * @rebind: Whether we're rebinding this vma to the same address range without - * an unbind in-between. - * - * This function builds a page-table tree (see xe_pt_stage_bind() for more - * information on page-table building), and the xe_vm_pgtable_update entries - * abstracting the operations needed to attach it to the main vm tree. It - * then takes the relevant locks and updates the metadata side of the main - * vm tree and submits the operations for pipelined attachment of the - * gpu page-table to the vm main tree, (which can be done either by the - * cpu and the GPU). - * - * Return: A valid dma-fence representing the pipelined attachment operation - * on success, an error pointer on error. - */ -struct dma_fence * -__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs, - bool rebind) -{ - struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; - struct xe_pt_migrate_pt_update bind_pt_update = { - .base = { - .ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops, - .vma = vma, - .tile_id = tile->id, - }, - .bind = true, - }; - struct xe_vm *vm = xe_vma_vm(vma); - u32 num_entries; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL; - struct xe_range_fence *rfence; - int err; - - bind_pt_update.locked = false; - xe_bo_assert_held(xe_vma_bo(vma)); - xe_vm_assert_held(vm); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma), q); - - err = xe_pt_prepare_bind(tile, vma, entries, &num_entries); - if (err) - goto err; - - err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); - if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); - if (err) - goto err; - - xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); - - xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); - xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries, - num_entries); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. on LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * on !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) - return ERR_PTR(-ENOMEM); - } else if (rebind && !xe_vm_in_lr_mode(vm)) { - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - } - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - kfree(ifence); - return ERR_PTR(-ENOMEM); - } - - fence = xe_migrate_update_pgtables(tile->migrate, - vm, xe_vma_bo(vma), q, - entries, num_entries, - syncs, num_syncs, - &bind_pt_update.base); - if (!IS_ERR(fence)) { - bool last_munmap_rebind = vma->gpuva.flags & XE_VMA_LAST_REBIND; - LLIST_HEAD(deferred); - int err; - - err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - bind_pt_update.base.start, - bind_pt_update.base.last, fence); - if (err) - dma_fence_wait(fence, false); - - /* TLB invalidation must be done before signaling rebind */ - if (ifence) { - int err = invalidation_fence_init(tile->primary_gt, - ifence, fence, - xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); - if (err) { - dma_fence_put(fence); - kfree(ifence); - return ERR_PTR(err); - } - fence = &ifence->base.base; - } - - /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, rebind || - last_munmap_rebind ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - DMA_RESV_USAGE_BOOKKEEP); - xe_pt_commit_bind(vma, entries, num_entries, rebind, - bind_pt_update.locked ? &deferred : NULL); - - /* This vma is live (again?) now */ - vma->tile_present |= BIT(tile->id); - - if (bind_pt_update.locked) { - to_userptr_vma(vma)->userptr.initial_bind = true; - up_read(&vm->userptr.notifier_lock); - xe_bo_put_commit(&deferred); - } - if (!rebind && last_munmap_rebind && - xe_vm_in_preempt_fence_mode(vm)) - xe_vm_queue_rebind_worker(vm); - } else { - kfree(rfence); - kfree(ifence); - if (bind_pt_update.locked) - up_read(&vm->userptr.notifier_lock); - xe_pt_abort_bind(vma, entries, num_entries); - } - - return fence; - -err: - return ERR_PTR(err); -} - -struct xe_pt_stage_unbind_walk { - /** @base: The pagewalk base-class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're unbinding from. */ - struct xe_tile *tile; - - /** - * @modified_start: Walk range start, modified to include any - * shared pagetables that we're the only user of and can thus - * treat as private. - */ - u64 modified_start; - /** @modified_end: Walk range start, modified like @modified_start. */ - u64 modified_end; - - /* Output */ - /* @wupd: Structure to track the page-table updates we're building */ - struct xe_walk_update wupd; -}; - -/* - * Check whether this range is the only one populating this pagetable, - * and in that case, update the walk range checks so that higher levels don't - * view us as a shared pagetable. - */ -static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, - const struct xe_pt *child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - unsigned int shift = walk->shifts[level]; - u64 size = 1ull << shift; - - if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && - ((next - addr) >> shift) == child->num_live) { - u64 size = 1ull << walk->shifts[level + 1]; - - *action = ACTION_CONTINUE; - - if (xe_walk->modified_start >= addr) - xe_walk->modified_start = round_down(addr, size); - if (xe_walk->modified_end <= next) - xe_walk->modified_end = round_up(next, size); - - return true; - } - - return false; -} - -static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); - - return 0; -} - -static int -xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - u64 size = 1ull << walk->shifts[--level]; - - if (!IS_ALIGNED(addr, size)) - addr = xe_walk->modified_start; - if (!IS_ALIGNED(next, size)) - next = xe_walk->modified_end; - - /* Parent == *child is the root pt. Don't kill it. */ - if (parent != *child && - xe_pt_check_kill(addr, next, level, xe_child, action, walk)) - return 0; - - if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, - &end_offset)) - return 0; - - (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false); - xe_walk->wupd.updates[level].update->qwords = end_offset - offset; - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { - .pt_entry = xe_pt_stage_unbind_entry, - .pt_post_descend = xe_pt_stage_unbind_post_descend, -}; - -/** - * xe_pt_stage_unbind() - Build page-table update structures for an unbind - * operation - * @tile: The tile we're unbinding for. - * @vma: The vma we're unbinding. - * @entries: Caller-provided storage for the update structures. - * - * Builds page-table update structures for an unbind operation. The function - * will attempt to remove all page-tables that we're the only user - * of, and for that to work, the unbind operation must be committed in the - * same critical section that blocks racing binds to the same page-table tree. - * - * Return: The number of entries used. - */ -static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries) -{ - struct xe_pt_stage_unbind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_unbind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - .modified_start = xe_vma_start(vma), - .modified_end = xe_vma_end(vma), - .wupd.entries = entries, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.wupd.num_used_entries; -} - -static void -xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, - struct xe_tile *tile, struct iosys_map *map, - void *ptr, u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_vma *vma = pt_update->vma; - u64 empty = __xe_pt_empty_pte(tile, xe_vma_vm(vma), update->pt->level); - int i; - - if (map && map->is_iomem) - for (i = 0; i < num_qwords; ++i) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, empty); - else if (map) - memset64(map->vaddr + qword_ofs * sizeof(u64), empty, - num_qwords); - else - memset64(ptr, empty, num_qwords); -} - -static void -xe_pt_commit_unbind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 num_entries, - struct llist_head *deferred) -{ - u32 j; - - xe_pt_commit_locks_assert(vma); - - for (j = 0; j < num_entries; ++j) { - struct xe_vm_pgtable_update *entry = &entries[j]; - struct xe_pt *pt = entry->pt; - - pt->num_live -= entry->qwords; - if (pt->level) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - u32 i; - - for (i = entry->ofs; i < entry->ofs + entry->qwords; - i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[i] = NULL; - } - } - } -} - -<<<<<<< -static const struct xe_migrate_pt_update_ops unbind_ops = { - .populate = xe_migrate_clear_pgtable_callback, -======= -static void -xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int i, level = 0; - u64 start, last; - - for (i = 0; i < pt_op->num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &pt_op->entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level)); - last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1; - - if (start < pt_update_ops->start) - pt_update_ops->start = start; - if (last > pt_update_ops->last) - pt_update_ops->last = last; -} - -static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) -{ - int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0; - - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, - xe->info.tile_count << shift); - - return 0; -} - -static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int err; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - pt_op->vma = NULL; - pt_op->bind = true; - pt_op->rebind = BIT(tile->id) & vma->tile_present; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - err = xe_pt_prepare_bind(tile, vma, pt_op->entries, - &pt_op->num_entries); - if (!err) { - xe_tile_assert(tile, pt_op->num_entries <= - ARRAY_SIZE(pt_op->entries)); - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, true); - - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. On LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * On !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!pt_op->rebind && xe_vm_has_scratch(vm) && - xe_vm_in_preempt_fence_mode(vm))) - pt_update_ops->needs_invalidation = true; - else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - - vma->tile_staged |= BIT(tile->id); - pt_op->vma = vma; - xe_pt_commit_prepare_bind(vma, pt_op->entries, - pt_op->num_entries, pt_op->rebind); - } else { - xe_pt_cancel_bind(vma, pt_op->entries, pt_op->num_entries); - } - - return err; -} - -static int unbind_op_prepare(struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int err; - - if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id))) - return 0; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - /* - * Wait for invalidation to complete. Can corrupt internal page table - * state if an invalidation is running while preparing an unbind. - */ - if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma))) - mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier); - - pt_op->vma = vma; - pt_op->bind = false; - pt_op->rebind = false; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries); - - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, false); - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - pt_update_ops->needs_invalidation = true; - - xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries); - - return 0; -} - -static int op_prepare(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op) -{ - int err = 0; - - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma); - pt_update_ops->wait_vm_kernel = true; - break; - case DRM_GPUVA_OP_REMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va)); - - if (!err && op->remap.prev) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.prev); - pt_update_ops->wait_vm_bookkeep = true; - } - if (!err && op->remap.next) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.next); - pt_update_ops->wait_vm_bookkeep = true; - } - break; - case DRM_GPUVA_OP_UNMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va)); - break; - case DRM_GPUVA_OP_PREFETCH: - err = bind_op_prepare(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va)); - pt_update_ops->wait_vm_kernel = true; - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static void -xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops) -{ - init_llist_head(&pt_update_ops->deferred); - pt_update_ops->start = ~0x0ull; - pt_update_ops->last = 0x0ull; -} - -/** - * xe_pt_update_ops_prepare() - Prepare PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Prepare PT update operations which includes updating internal PT state, - * allocate memory for page tables, populate page table being pruned in, and - * create PT update operations for leaf insertion / removal. - * - * Return: 0 on success, negative error code on error. - */ -int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct xe_vma_op *op; - int shift = tile->media_gt ? 1 : 0; - int err; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - xe_pt_update_ops_init(pt_update_ops); - - err = dma_resv_reserve_fences(xe_vm_resv(vops->vm), - tile_to_xe(tile)->info.tile_count << shift); - if (err) - return err; - - list_for_each_entry(op, &vops->list, link) { - err = op_prepare(vops->vm, tile, pt_update_ops, op); - - if (err) - return err; - } - - xe_tile_assert(tile, pt_update_ops->current_op <= - pt_update_ops->num_ops); - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE) - return -ENOSPC; -#endif - - return 0; -} - -static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present |= BIT(tile->id); - vma->tile_staged &= ~BIT(tile->id); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - to_userptr_vma(vma)->userptr.initial_bind = true; - } - - /* - * Kick rebind worker if this bind triggers preempt fences and not in - * the rebind worker - */ - if (pt_update_ops->wait_vm_bookkeep && - xe_vm_in_preempt_fence_mode(vm) && - !current->mm) - xe_vm_queue_rebind_worker(vm); -} - -static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present &= ~BIT(tile->id); - if (!vma->tile_present) { - list_del_init(&vma->combined_links.rebind); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - } -} - -static void op_commit(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op, struct dma_fence *fence, - struct dma_fence *fence2) -{ - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, - fence2); - break; - case DRM_GPUVA_OP_REMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence, - fence2); - - if (op->remap.prev) - bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence, fence2); - if (op->remap.next) - bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence, fence2); - break; - case DRM_GPUVA_OP_UNMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence, fence2); - break; - case DRM_GPUVA_OP_PREFETCH: - bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence, fence2); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } -} - -static const struct xe_migrate_pt_update_ops migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, ->>>>>>> - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { - .populate = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -/** - * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma - * address range. - * @tile: The tile to unbind for. - * @vma: The vma to unbind. - * @q: The exec_queue with which to do pipelined page-table updates. - * @syncs: Entries to sync on before disconnecting the tree to be destroyed. - * @num_syncs: Number of @sync entries. - * - * This function builds a the xe_vm_pgtable_update entries abstracting the - * operations needed to detach the page-table tree to be destroyed from the - * man vm tree. - * It then takes the relevant locks and submits the operations for - * pipelined detachment of the gpu page-table from the vm main tree, - * (which can be done either by the cpu and the GPU), Finally it frees the - * detached page-table tree. - * - * Return: A valid dma-fence representing the pipelined detachment operation - * on success, an error pointer on error. - */ -struct dma_fence * -__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs) -{ -<<<<<<< - struct xe_vm *vm = vops->vm; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL, *mfence = NULL; - struct dma_fence **fences = NULL; - struct dma_fence_array *cf = NULL; - struct xe_range_fence *rfence; - struct xe_vma_op *op; - int err = 0, i; - struct xe_migrate_pt_update update = { - .ops = pt_update_ops->needs_userptr_lock ? - &userptr_migrate_ops : - &migrate_ops, - .vops = vops, - .tile_id = tile->id, -======= - struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; - struct xe_pt_migrate_pt_update unbind_pt_update = { - .base = { - .ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops : - &unbind_ops, - .vma = vma, - .tile_id = tile->id, - }, ->>>>>>> - }; - struct xe_vm *vm = xe_vma_vm(vma); - u32 num_entries; - struct dma_fence *fence = NULL; - struct invalidation_fence *ifence; - struct xe_range_fence *rfence; - int err; - - LLIST_HEAD(deferred); - - xe_bo_assert_held(xe_vma_bo(vma)); - xe_vm_assert_held(vm); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma), q); - - num_entries = xe_pt_stage_unbind(tile, vma, entries); - xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); - - xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); - xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, - num_entries); - -<<<<<<< - err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); - if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); - if (err) - return ERR_PTR(err); - - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) - return ERR_PTR(-ENOMEM); -======= - if (pt_update_ops->needs_invalidation) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) { - err = -ENOMEM; - goto kill_vm_tile1; - } - if (tile->media_gt) { - mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!mfence) { - err = -ENOMEM; - goto free_ifence; - } - fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL); - if (!fences) { - err = -ENOMEM; - goto free_ifence; - } - cf = dma_fence_array_alloc(2); - if (!cf) { - err = -ENOMEM; - goto free_ifence; - } - } - } ->>>>>>> - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - kfree(ifence); - return ERR_PTR(-ENOMEM); - } - - /* - * Even if we were already evicted and unbind to destroy, we need to - * clear again here. The eviction may have updated pagetables at a - * lower level, because it needs to be more conservative. - */ - fence = xe_migrate_update_pgtables(tile->migrate, - vm, NULL, q ? q : - vm->q[tile->id], - entries, num_entries, - syncs, num_syncs, - &unbind_pt_update.base); - if (!IS_ERR(fence)) { - int err; - - err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - unbind_pt_update.base.start, - unbind_pt_update.base.last, fence); - if (err) - dma_fence_wait(fence, false); - -<<<<<<< - /* TLB invalidation must be done before signaling unbind */ - err = invalidation_fence_init(tile->primary_gt, ifence, fence, - xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); - if (err) { - dma_fence_put(fence); - kfree(ifence); - return ERR_PTR(err); - } - fence = &ifence->base.base; - - /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, - DMA_RESV_USAGE_BOOKKEEP); - - /* This fence will be installed by caller when doing eviction */ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - DMA_RESV_USAGE_BOOKKEEP); - xe_pt_commit_unbind(vma, entries, num_entries, - unbind_pt_update.locked ? &deferred : NULL); - vma->tile_present &= ~BIT(tile->id); - } else { - kfree(rfence); - kfree(ifence); - } - - if (!vma->tile_present) - list_del_init(&vma->combined_links.rebind); - - if (unbind_pt_update.locked) { - xe_tile_assert(tile, xe_vma_is_userptr(vma)); -======= - xe_pt_commit(pt_op->vma, pt_op->entries, - pt_op->num_entries, &pt_update_ops->deferred); - pt_op->vma = NULL; /* skip in xe_pt_update_ops_abort */ - } - - if (xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - pt_update_ops->start, - pt_update_ops->last, fence)) - dma_fence_wait(fence, false); - - /* tlb invalidation must be done before signaling rebind */ - if (ifence) { - if (mfence) - dma_fence_get(fence); - invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - if (mfence) { - invalidation_fence_init(tile->media_gt, mfence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - fences[0] = &ifence->base.base; - fences[1] = &mfence->base.base; - dma_fence_array_init(cf, 2, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - fence = &cf->base; - } else { - fence = &ifence->base.base; - } - } - - if (!mfence) { - dma_resv_add_fence(xe_vm_resv(vm), fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); - } else { - dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, - &ifence->base.base, &mfence->base.base); - } ->>>>>>> - - if (!vma->tile_present) { - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - up_read(&vm->userptr.notifier_lock); - xe_bo_put_commit(&deferred); - } - - return fence; -<<<<<<< -======= - -free_rfence: - kfree(rfence); -free_ifence: - kfree(cf); - kfree(fences); - kfree(mfence); - kfree(ifence); -kill_vm_tile1: - if (err != -EAGAIN && tile->id) - xe_vm_kill(vops->vm, false); - - return ERR_PTR(err); -} - -/** - * xe_pt_update_ops_fini() - Finish PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operations - * - * Finish PT update operations by committing to destroy page table memory - */ -void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - for (i = 0; i < pt_update_ops->current_op; ++i) { - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; - - xe_pt_free_bind(pt_op->entries, pt_op->num_entries); - } - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); -} - -/** - * xe_pt_update_ops_abort() - Abort PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Abort PT update operations by unwinding internal PT state - */ -void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - for (i = pt_update_ops->num_ops - 1; i >= 0; --i) { - struct xe_vm_pgtable_update_op *pt_op = - &pt_update_ops->ops[i]; - - if (!pt_op->vma || i >= pt_update_ops->current_op) - continue; - - if (pt_op->bind) - xe_pt_abort_bind(pt_op->vma, pt_op->entries, - pt_op->num_entries, - pt_op->rebind); - else - xe_pt_abort_unbind(pt_op->vma, pt_op->entries, - pt_op->num_entries); - } - - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); ->>>>>>> -} diff --git a/rr-cache/61b4d511b17c3c471878e1610fa36de4c26207ec/postimage b/rr-cache/61b4d511b17c3c471878e1610fa36de4c26207ec/postimage deleted file mode 100644 index 249651079810..000000000000 --- a/rr-cache/61b4d511b17c3c471878e1610fa36de4c26207ec/postimage +++ /dev/null @@ -1,975 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include "xe_gt.h" - -#include <linux/minmax.h> - -#include <drm/drm_managed.h> - -#include <uapi/drm/xe_drm.h> - -#include <generated/xe_wa_oob.h> - -#include "instructions/xe_gfxpipe_commands.h" -#include "instructions/xe_mi_commands.h" -#include "regs/xe_gt_regs.h" -#include "xe_assert.h" -#include "xe_bb.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_exec_queue.h" -#include "xe_execlist.h" -#include "xe_force_wake.h" -#include "xe_ggtt.h" -#include "xe_gsc.h" -#include "xe_gt_ccs_mode.h" -#include "xe_gt_clock.h" -#include "xe_gt_freq.h" -#include "xe_gt_idle.h" -#include "xe_gt_mcr.h" -#include "xe_gt_pagefault.h" -#include "xe_gt_printk.h" -#include "xe_gt_sriov_pf.h" -#include "xe_gt_sysfs.h" -#include "xe_gt_tlb_invalidation.h" -#include "xe_gt_topology.h" -#include "xe_guc_exec_queue_types.h" -#include "xe_guc_pc.h" -#include "xe_hw_fence.h" -#include "xe_hw_engine_class_sysfs.h" -#include "xe_irq.h" -#include "xe_lmtt.h" -#include "xe_lrc.h" -#include "xe_map.h" -#include "xe_migrate.h" -#include "xe_mmio.h" -#include "xe_pat.h" -#include "xe_pm.h" -#include "xe_mocs.h" -#include "xe_reg_sr.h" -#include "xe_ring_ops.h" -#include "xe_sa.h" -#include "xe_sched_job.h" -#include "xe_sriov.h" -#include "xe_tuning.h" -#include "xe_uc.h" -#include "xe_uc_fw.h" -#include "xe_vm.h" -#include "xe_wa.h" -#include "xe_wopcm.h" - -static void gt_fini(struct drm_device *drm, void *arg) -{ - struct xe_gt *gt = arg; - - destroy_workqueue(gt->ordered_wq); -} - -struct xe_gt *xe_gt_alloc(struct xe_tile *tile) -{ - struct xe_gt *gt; - int err; - - gt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*gt), GFP_KERNEL); - if (!gt) - return ERR_PTR(-ENOMEM); - - gt->tile = tile; - gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0); - - err = drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_fini, gt); - if (err) - return ERR_PTR(err); - - return gt; -} - -void xe_gt_sanitize(struct xe_gt *gt) -{ - /* - * FIXME: if xe_uc_sanitize is called here, on TGL driver will not - * reload - */ - gt->uc.guc.submission_state.enabled = false; -} - -static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) -{ - u32 reg; - int err; - - if (!XE_WA(gt, 16023588340)) - return; - - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (WARN_ON(err)) - return; - - if (!xe_gt_is_media_type(gt)) { - xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH); - reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); - reg |= CG_DIS_CNTLBUS; - xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); - } - - xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -} - -static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) -{ - u32 reg; - int err; - - if (!XE_WA(gt, 16023588340)) - return; - - if (xe_gt_is_media_type(gt)) - return; - - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (WARN_ON(err)) - return; - - reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); - reg &= ~CG_DIS_CNTLBUS; - xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); - - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -} - -/** - * xe_gt_remove() - Clean up the GT structures before driver removal - * @gt: the GT object - * - * This function should only act on objects/structures that must be cleaned - * before the driver removal callback is complete and therefore can't be - * deferred to a drmm action. - */ -void xe_gt_remove(struct xe_gt *gt) -{ - int i; - - xe_uc_remove(>->uc); - - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); - - xe_gt_disable_host_l2_vram(gt); -} - -static void gt_reset_worker(struct work_struct *w); - -static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) -{ - struct xe_sched_job *job; - struct xe_bb *bb; - struct dma_fence *fence; - long timeout; - - bb = xe_bb_new(gt, 4, false); - if (IS_ERR(bb)) - return PTR_ERR(bb); - - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); - return PTR_ERR(job); - } - - xe_sched_job_arm(job); - fence = dma_fence_get(&job->drm.s_fence->finished); - xe_sched_job_push(job); - - timeout = dma_fence_wait_timeout(fence, false, HZ); - dma_fence_put(fence); - xe_bb_free(bb, NULL); - if (timeout < 0) - return timeout; - else if (!timeout) - return -ETIME; - - return 0; -} - -/* - * Convert back from encoded value to type-safe, only to be used when reg.mcr - * is true - */ -static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg) -{ - return (const struct xe_reg_mcr){.__reg.raw = reg.raw }; -} - -static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) -{ - struct xe_reg_sr *sr = &q->hwe->reg_lrc; - struct xe_reg_sr_entry *entry; - unsigned long idx; - struct xe_sched_job *job; - struct xe_bb *bb; - struct dma_fence *fence; - long timeout; - int count = 0; - - if (q->hwe->class == XE_ENGINE_CLASS_RENDER) - /* Big enough to emit all of the context's 3DSTATE */ - bb = xe_bb_new(gt, xe_gt_lrc_size(gt, q->hwe->class), false); - else - /* Just pick a large BB size */ - bb = xe_bb_new(gt, SZ_4K, false); - - if (IS_ERR(bb)) - return PTR_ERR(bb); - - xa_for_each(&sr->xa, idx, entry) - ++count; - - if (count) { - xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name); - - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); - - xa_for_each(&sr->xa, idx, entry) { - struct xe_reg reg = entry->reg; - struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg); - u32 val; - - /* - * Skip reading the register if it's not really needed - */ - if (reg.masked) - val = entry->clr_bits << 16; - else if (entry->clr_bits + 1) - val = (reg.mcr ? - xe_gt_mcr_unicast_read_any(gt, reg_mcr) : - xe_mmio_read32(gt, reg)) & (~entry->clr_bits); - else - val = 0; - - val |= entry->set_bits; - - bb->cs[bb->len++] = reg.addr; - bb->cs[bb->len++] = val; - xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val); - } - } - - xe_lrc_emit_hwe_state_instructions(q, bb); - - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); - return PTR_ERR(job); - } - - xe_sched_job_arm(job); - fence = dma_fence_get(&job->drm.s_fence->finished); - xe_sched_job_push(job); - - timeout = dma_fence_wait_timeout(fence, false, HZ); - dma_fence_put(fence); - xe_bb_free(bb, NULL); - if (timeout < 0) - return timeout; - else if (!timeout) - return -ETIME; - - return 0; -} - -int xe_gt_record_default_lrcs(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - int err = 0; - - for_each_hw_engine(hwe, gt, id) { - struct xe_exec_queue *q, *nop_q; - void *default_lrc; - - if (gt->default_lrc[hwe->class]) - continue; - - xe_reg_sr_init(&hwe->reg_lrc, hwe->name, xe); - xe_wa_process_lrc(hwe); - xe_hw_engine_setup_default_lrc_state(hwe); - xe_tuning_process_lrc(hwe); - - default_lrc = drmm_kzalloc(&xe->drm, - xe_gt_lrc_size(gt, hwe->class), - GFP_KERNEL); - if (!default_lrc) - return -ENOMEM; - - q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1, - hwe, EXEC_QUEUE_FLAG_KERNEL, 0); - if (IS_ERR(q)) { - err = PTR_ERR(q); - xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n", - hwe->name, q); - return err; - } - - /* Prime golden LRC with known good state */ - err = emit_wa_job(gt, q); - if (err) { - xe_gt_err(gt, "hwe %s: emit_wa_job failed (%pe) guc_id=%u\n", - hwe->name, ERR_PTR(err), q->guc->id); - goto put_exec_queue; - } - - nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), - 1, hwe, EXEC_QUEUE_FLAG_KERNEL, 0); - if (IS_ERR(nop_q)) { - err = PTR_ERR(nop_q); - xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n", - hwe->name, nop_q); - goto put_exec_queue; - } - - /* Switch to different LRC */ - err = emit_nop_job(gt, nop_q); - if (err) { - xe_gt_err(gt, "hwe %s: nop emit_nop_job failed (%pe) guc_id=%u\n", - hwe->name, ERR_PTR(err), nop_q->guc->id); - goto put_nop_q; - } - - /* Reload golden LRC to record the effect of any indirect W/A */ - err = emit_nop_job(gt, q); - if (err) { - xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n", - hwe->name, ERR_PTR(err), q->guc->id); - goto put_nop_q; - } - - xe_map_memcpy_from(xe, default_lrc, - &q->lrc[0]->bo->vmap, - xe_lrc_pphwsp_offset(q->lrc[0]), - xe_gt_lrc_size(gt, hwe->class)); - - gt->default_lrc[hwe->class] = default_lrc; -put_nop_q: - xe_exec_queue_put(nop_q); -put_exec_queue: - xe_exec_queue_put(q); - if (err) - break; - } - - return err; -} - -int xe_gt_init_early(struct xe_gt *gt) -{ - int err; - - if (IS_SRIOV_PF(gt_to_xe(gt))) { - err = xe_gt_sriov_pf_init_early(gt); - if (err) - return err; - } - - xe_reg_sr_init(>->reg_sr, "GT", gt_to_xe(gt)); - - err = xe_wa_init(gt); - if (err) - return err; - - xe_wa_process_gt(gt); - xe_wa_process_oob(gt); - xe_tuning_process_gt(gt); - - xe_force_wake_init_gt(gt, gt_to_fw(gt)); - spin_lock_init(>->global_invl_lock); - - return 0; -} - -static void dump_pat_on_error(struct xe_gt *gt) -{ - struct drm_printer p; - char prefix[32]; - - snprintf(prefix, sizeof(prefix), "[GT%u Error]", gt->info.id); - p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, prefix); - - xe_pat_dump(gt, &p); -} - -static int gt_fw_domain_init(struct xe_gt *gt) -{ - int err, i; - - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - goto err_hw_fence_irq; - - if (!xe_gt_is_media_type(gt)) { - err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); - if (err) - goto err_force_wake; - if (IS_SRIOV_PF(gt_to_xe(gt))) - xe_lmtt_init(>_to_tile(gt)->sriov.pf.lmtt); - } - - /* Enable per hw engine IRQs */ - xe_irq_enable_hwe(gt); - - /* Rerun MCR init as we now have hw engine list */ - xe_gt_mcr_init(gt); - - err = xe_hw_engines_init_early(gt); - if (err) - goto err_force_wake; - - err = xe_hw_engine_class_sysfs_init(gt); - if (err) - goto err_force_wake; - - /* Initialize CCS mode sysfs after early initialization of HW engines */ - err = xe_gt_ccs_mode_sysfs_init(gt); - if (err) - goto err_force_wake; - - /* - * Stash hardware-reported version. Since this register does not exist - * on pre-MTL platforms, reading it there will (correctly) return 0. - */ - gt->info.gmdid = xe_mmio_read32(gt, GMD_ID); - - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); - XE_WARN_ON(err); - - return 0; - -err_force_wake: - dump_pat_on_error(gt); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -err_hw_fence_irq: - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); - - return err; -} - -static int all_fw_domain_init(struct xe_gt *gt) -{ - int err, i; - - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) - goto err_hw_fence_irq; - - xe_gt_mcr_set_implicit_defaults(gt); - xe_reg_sr_apply_mmio(>->reg_sr, gt); - - err = xe_gt_clock_init(gt); - if (err) - goto err_force_wake; - - xe_mocs_init(gt); - err = xe_execlist_init(gt); - if (err) - goto err_force_wake; - - err = xe_hw_engines_init(gt); - if (err) - goto err_force_wake; - - err = xe_uc_init_post_hwconfig(>->uc); - if (err) - goto err_force_wake; - - if (!xe_gt_is_media_type(gt)) { - /* - * USM has its only SA pool to non-block behind user operations - */ - if (gt_to_xe(gt)->info.has_usm) { - struct xe_device *xe = gt_to_xe(gt); - - gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt), - IS_DGFX(xe) ? SZ_1M : SZ_512K, 16); - if (IS_ERR(gt->usm.bb_pool)) { - err = PTR_ERR(gt->usm.bb_pool); - goto err_force_wake; - } - } - } - - if (!xe_gt_is_media_type(gt)) { - struct xe_tile *tile = gt_to_tile(gt); - - tile->migrate = xe_migrate_init(tile); - if (IS_ERR(tile->migrate)) { - err = PTR_ERR(tile->migrate); - goto err_force_wake; - } - } - - err = xe_uc_init_hw(>->uc); - if (err) - goto err_force_wake; - - /* Configure default CCS mode of 1 engine with all resources */ - if (xe_gt_ccs_mode_enabled(gt)) { - gt->ccs_mode = 1; - xe_gt_apply_ccs_mode(gt); - } - - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) - xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); - - if (IS_SRIOV_PF(gt_to_xe(gt))) - xe_gt_sriov_pf_init_hw(gt); - - err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - XE_WARN_ON(err); - - return 0; - -err_force_wake: - xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); -err_hw_fence_irq: - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); - - return err; -} - -/* - * Initialize enough GT to be able to load GuC in order to obtain hwconfig and - * enable CTB communication. - */ -int xe_gt_init_hwconfig(struct xe_gt *gt) -{ - int err; - - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - goto out; - - xe_gt_mcr_init_early(gt); - xe_pat_init(gt); - - err = xe_uc_init(>->uc); - if (err) - goto out_fw; - - err = xe_uc_init_hwconfig(>->uc); - if (err) - goto out_fw; - - xe_gt_topology_init(gt); - xe_gt_mcr_init(gt); - xe_gt_enable_host_l2_vram(gt); - -out_fw: - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -out: - return err; -} - -int xe_gt_init(struct xe_gt *gt) -{ - int err; - int i; - - INIT_WORK(>->reset.worker, gt_reset_worker); - - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) { - gt->ring_ops[i] = xe_ring_ops_get(gt, i); - xe_hw_fence_irq_init(>->fence_irq[i]); - } - - err = xe_gt_tlb_invalidation_init(gt); - if (err) - return err; - - err = xe_gt_pagefault_init(gt); - if (err) - return err; - - xe_mocs_init_early(gt); - - err = xe_gt_sysfs_init(gt); - if (err) - return err; - - err = gt_fw_domain_init(gt); - if (err) - return err; - - err = xe_gt_idle_init(>->gtidle); - if (err) - return err; - - err = xe_gt_freq_init(gt); - if (err) - return err; - - xe_force_wake_init_engines(gt, gt_to_fw(gt)); - - err = all_fw_domain_init(gt); - if (err) - return err; - - xe_gt_record_user_engines(gt); - - return 0; -} - -void xe_gt_record_user_engines(struct xe_gt *gt) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - - gt->user_engines.mask = 0; - memset(gt->user_engines.instances_per_class, 0, - sizeof(gt->user_engines.instances_per_class)); - - for_each_hw_engine(hwe, gt, id) { - if (xe_hw_engine_is_reserved(hwe)) - continue; - - gt->user_engines.mask |= BIT_ULL(id); - gt->user_engines.instances_per_class[hwe->class]++; - } - - xe_gt_assert(gt, (gt->user_engines.mask | gt->info.engine_mask) - == gt->info.engine_mask); -} - -static int do_gt_reset(struct xe_gt *gt) -{ - int err; - - xe_gsc_wa_14015076503(gt, true); - - xe_mmio_write32(gt, GDRST, GRDOM_FULL); - err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false); - if (err) - xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n", - ERR_PTR(err)); - - xe_gsc_wa_14015076503(gt, false); - - return err; -} - -static int vf_gt_restart(struct xe_gt *gt) -{ - int err; - - err = xe_uc_sanitize_reset(>->uc); - if (err) - return err; - - err = xe_uc_init_hw(>->uc); - if (err) - return err; - - err = xe_uc_start(>->uc); - if (err) - return err; - - return 0; -} - -static int do_gt_restart(struct xe_gt *gt) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - int err; - - if (IS_SRIOV_VF(gt_to_xe(gt))) - return vf_gt_restart(gt); - - xe_pat_init(gt); - - xe_gt_enable_host_l2_vram(gt); - - xe_gt_mcr_set_implicit_defaults(gt); - xe_reg_sr_apply_mmio(>->reg_sr, gt); - - err = xe_wopcm_init(>->uc.wopcm); - if (err) - return err; - - for_each_hw_engine(hwe, gt, id) - xe_hw_engine_enable_ring(hwe); - - err = xe_uc_sanitize_reset(>->uc); - if (err) - return err; - - err = xe_uc_init_hw(>->uc); - if (err) - return err; - - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) - xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); - - if (IS_SRIOV_PF(gt_to_xe(gt))) - xe_gt_sriov_pf_init_hw(gt); - - xe_mocs_init(gt); - err = xe_uc_start(>->uc); - if (err) - return err; - - for_each_hw_engine(hwe, gt, id) { - xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); - xe_reg_sr_apply_whitelist(hwe); - } - - /* Get CCS mode in sync between sw/hw */ - xe_gt_apply_ccs_mode(gt); - - /* Restore GT freq to expected values */ - xe_gt_sanitize_freq(gt); - - if (IS_SRIOV_PF(gt_to_xe(gt))) - xe_gt_sriov_pf_restart(gt); - - return 0; -} - -static int gt_reset(struct xe_gt *gt) -{ - int err; - - if (xe_device_wedged(gt_to_xe(gt))) - return -ECANCELED; - - /* We only support GT resets with GuC submission */ - if (!xe_device_uc_enabled(gt_to_xe(gt))) - return -ENODEV; - - xe_gt_info(gt, "reset started\n"); - - xe_pm_runtime_get(gt_to_xe(gt)); - - if (xe_fault_inject_gt_reset()) { - err = -ECANCELED; - goto err_fail; - } - - xe_gt_sanitize(gt); - - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) - goto err_msg; - - xe_uc_gucrc_disable(>->uc); - xe_uc_stop_prepare(>->uc); - xe_gt_pagefault_reset(gt); - - xe_uc_stop(>->uc); - - xe_gt_tlb_invalidation_reset(gt); - - err = do_gt_reset(gt); - if (err) - goto err_out; - - err = do_gt_restart(gt); - if (err) - goto err_out; - - err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - XE_WARN_ON(err); - xe_pm_runtime_put(gt_to_xe(gt)); - - xe_gt_info(gt, "reset done\n"); - - return 0; - -err_out: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -err_msg: - XE_WARN_ON(xe_uc_start(>->uc)); -err_fail: - xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); - - xe_device_declare_wedged(gt_to_xe(gt)); - xe_pm_runtime_put(gt_to_xe(gt)); - - return err; -} - -static void gt_reset_worker(struct work_struct *w) -{ - struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker); - - gt_reset(gt); -} - -void xe_gt_reset_async(struct xe_gt *gt) -{ - xe_gt_info(gt, "trying reset\n"); - - /* Don't do a reset while one is already in flight */ - if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(>->uc)) - return; - - xe_gt_info(gt, "reset queued\n"); - queue_work(gt->ordered_wq, >->reset.worker); -} - -void xe_gt_suspend_prepare(struct xe_gt *gt) -{ - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - - xe_uc_stop_prepare(>->uc); - - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -} - -int xe_gt_suspend(struct xe_gt *gt) -{ - int err; - - xe_gt_dbg(gt, "suspending\n"); - xe_gt_sanitize(gt); - - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) - goto err_msg; - - err = xe_uc_suspend(>->uc); - if (err) - goto err_force_wake; - - xe_gt_idle_disable_pg(gt); - - xe_gt_disable_host_l2_vram(gt); - - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_gt_dbg(gt, "suspended\n"); - - return 0; - -err_force_wake: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -err_msg: - xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err)); - - return err; -} - -/** - * xe_gt_sanitize_freq() - Restore saved frequencies if necessary. - * @gt: the GT object - * - * Called after driver init/GSC load completes to restore GT frequencies if we - * limited them for any WAs. - */ -int xe_gt_sanitize_freq(struct xe_gt *gt) -{ - int ret = 0; - - if ((!xe_uc_fw_is_available(>->uc.gsc.fw) || - xe_uc_fw_is_loaded(>->uc.gsc.fw)) && XE_WA(gt, 22019338487)) - ret = xe_guc_pc_restore_stashed_freq(>->uc.guc.pc); - - return ret; -} - -int xe_gt_resume(struct xe_gt *gt) -{ - int err; - - xe_gt_dbg(gt, "resuming\n"); - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) - goto err_msg; - - err = do_gt_restart(gt); - if (err) - goto err_force_wake; - - xe_gt_idle_enable_pg(gt); - - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_gt_dbg(gt, "resumed\n"); - - return 0; - -err_force_wake: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -err_msg: - xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err)); - - return err; -} - -struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt, - enum xe_engine_class class, - u16 instance, bool logical) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - - for_each_hw_engine(hwe, gt, id) - if (hwe->class == class && - ((!logical && hwe->instance == instance) || - (logical && hwe->logical_instance == instance))) - return hwe; - - return NULL; -} - -struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, - enum xe_engine_class class) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - - for_each_hw_engine(hwe, gt, id) { - switch (class) { - case XE_ENGINE_CLASS_RENDER: - case XE_ENGINE_CLASS_COMPUTE: - if (hwe->class == XE_ENGINE_CLASS_RENDER || - hwe->class == XE_ENGINE_CLASS_COMPUTE) - return hwe; - break; - default: - if (hwe->class == class) - return hwe; - } - } - - return NULL; -} - -struct xe_hw_engine *xe_gt_any_hw_engine(struct xe_gt *gt) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - - for_each_hw_engine(hwe, gt, id) - return hwe; - - return NULL; -} - -/** - * xe_gt_declare_wedged() - Declare GT wedged - * @gt: the GT object - * - * Wedge the GT which stops all submission, saves desired debug state, and - * cleans up anything which could timeout. - */ -void xe_gt_declare_wedged(struct xe_gt *gt) -{ - xe_gt_assert(gt, gt_to_xe(gt)->wedged.mode); - - xe_uc_declare_wedged(>->uc); - xe_gt_tlb_invalidation_reset(gt); -} diff --git a/rr-cache/61b4d511b17c3c471878e1610fa36de4c26207ec/preimage b/rr-cache/61b4d511b17c3c471878e1610fa36de4c26207ec/preimage deleted file mode 100644 index d566b2e2c706..000000000000 --- a/rr-cache/61b4d511b17c3c471878e1610fa36de4c26207ec/preimage +++ /dev/null @@ -1,980 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include "xe_gt.h" - -#include <linux/minmax.h> - -#include <drm/drm_managed.h> -<<<<<<< -#include <drm/xe_drm.h> -======= -#include <uapi/drm/xe_drm.h> - -#include <generated/xe_wa_oob.h> ->>>>>>> - -#include <generated/xe_wa_oob.h> - -#include "instructions/xe_gfxpipe_commands.h" -#include "instructions/xe_mi_commands.h" -#include "regs/xe_gt_regs.h" -#include "xe_assert.h" -#include "xe_bb.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_exec_queue.h" -#include "xe_execlist.h" -#include "xe_force_wake.h" -#include "xe_ggtt.h" -#include "xe_gsc.h" -#include "xe_gt_ccs_mode.h" -#include "xe_gt_clock.h" -#include "xe_gt_freq.h" -#include "xe_gt_idle.h" -#include "xe_gt_mcr.h" -#include "xe_gt_pagefault.h" -#include "xe_gt_printk.h" -#include "xe_gt_sriov_pf.h" -#include "xe_gt_sysfs.h" -#include "xe_gt_tlb_invalidation.h" -#include "xe_gt_topology.h" -#include "xe_guc_exec_queue_types.h" -#include "xe_guc_pc.h" -#include "xe_hw_fence.h" -#include "xe_hw_engine_class_sysfs.h" -#include "xe_irq.h" -#include "xe_lmtt.h" -#include "xe_lrc.h" -#include "xe_map.h" -#include "xe_migrate.h" -#include "xe_mmio.h" -#include "xe_pat.h" -#include "xe_pm.h" -#include "xe_mocs.h" -#include "xe_reg_sr.h" -#include "xe_ring_ops.h" -#include "xe_sa.h" -#include "xe_sched_job.h" -#include "xe_sriov.h" -#include "xe_tuning.h" -#include "xe_uc.h" -#include "xe_uc_fw.h" -#include "xe_vm.h" -#include "xe_wa.h" -#include "xe_wopcm.h" - -static void gt_fini(struct drm_device *drm, void *arg) -{ - struct xe_gt *gt = arg; - - destroy_workqueue(gt->ordered_wq); -} - -struct xe_gt *xe_gt_alloc(struct xe_tile *tile) -{ - struct xe_gt *gt; - int err; - - gt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*gt), GFP_KERNEL); - if (!gt) - return ERR_PTR(-ENOMEM); - - gt->tile = tile; - gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq", 0); - - err = drmm_add_action_or_reset(>_to_xe(gt)->drm, gt_fini, gt); - if (err) - return ERR_PTR(err); - - return gt; -} - -void xe_gt_sanitize(struct xe_gt *gt) -{ - /* - * FIXME: if xe_uc_sanitize is called here, on TGL driver will not - * reload - */ - gt->uc.guc.submission_state.enabled = false; -} - -static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) -{ - u32 reg; - int err; - - if (!XE_WA(gt, 16023588340)) - return; - - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (WARN_ON(err)) - return; - - if (!xe_gt_is_media_type(gt)) { - xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH); - reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); - reg |= CG_DIS_CNTLBUS; - xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); - } - - xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -} - -static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) -{ - u32 reg; - int err; - - if (!XE_WA(gt, 16023588340)) - return; - - if (xe_gt_is_media_type(gt)) - return; - - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (WARN_ON(err)) - return; - - reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); - reg &= ~CG_DIS_CNTLBUS; - xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); - - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -} - -/** - * xe_gt_remove() - Clean up the GT structures before driver removal - * @gt: the GT object - * - * This function should only act on objects/structures that must be cleaned - * before the driver removal callback is complete and therefore can't be - * deferred to a drmm action. - */ -void xe_gt_remove(struct xe_gt *gt) -{ - int i; - - xe_uc_remove(>->uc); - - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); - - xe_gt_disable_host_l2_vram(gt); -} - -static void gt_reset_worker(struct work_struct *w); - -static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) -{ - struct xe_sched_job *job; - struct xe_bb *bb; - struct dma_fence *fence; - long timeout; - - bb = xe_bb_new(gt, 4, false); - if (IS_ERR(bb)) - return PTR_ERR(bb); - - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); - return PTR_ERR(job); - } - - xe_sched_job_arm(job); - fence = dma_fence_get(&job->drm.s_fence->finished); - xe_sched_job_push(job); - - timeout = dma_fence_wait_timeout(fence, false, HZ); - dma_fence_put(fence); - xe_bb_free(bb, NULL); - if (timeout < 0) - return timeout; - else if (!timeout) - return -ETIME; - - return 0; -} - -/* - * Convert back from encoded value to type-safe, only to be used when reg.mcr - * is true - */ -static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg) -{ - return (const struct xe_reg_mcr){.__reg.raw = reg.raw }; -} - -static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) -{ - struct xe_reg_sr *sr = &q->hwe->reg_lrc; - struct xe_reg_sr_entry *entry; - unsigned long idx; - struct xe_sched_job *job; - struct xe_bb *bb; - struct dma_fence *fence; - long timeout; - int count = 0; - - if (q->hwe->class == XE_ENGINE_CLASS_RENDER) - /* Big enough to emit all of the context's 3DSTATE */ - bb = xe_bb_new(gt, xe_gt_lrc_size(gt, q->hwe->class), false); - else - /* Just pick a large BB size */ - bb = xe_bb_new(gt, SZ_4K, false); - - if (IS_ERR(bb)) - return PTR_ERR(bb); - - xa_for_each(&sr->xa, idx, entry) - ++count; - - if (count) { - xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name); - - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); - - xa_for_each(&sr->xa, idx, entry) { - struct xe_reg reg = entry->reg; - struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg); - u32 val; - - /* - * Skip reading the register if it's not really needed - */ - if (reg.masked) - val = entry->clr_bits << 16; - else if (entry->clr_bits + 1) - val = (reg.mcr ? - xe_gt_mcr_unicast_read_any(gt, reg_mcr) : - xe_mmio_read32(gt, reg)) & (~entry->clr_bits); - else - val = 0; - - val |= entry->set_bits; - - bb->cs[bb->len++] = reg.addr; - bb->cs[bb->len++] = val; - xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val); - } - } - - xe_lrc_emit_hwe_state_instructions(q, bb); - - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); - return PTR_ERR(job); - } - - xe_sched_job_arm(job); - fence = dma_fence_get(&job->drm.s_fence->finished); - xe_sched_job_push(job); - - timeout = dma_fence_wait_timeout(fence, false, HZ); - dma_fence_put(fence); - xe_bb_free(bb, NULL); - if (timeout < 0) - return timeout; - else if (!timeout) - return -ETIME; - - return 0; -} - -int xe_gt_record_default_lrcs(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - int err = 0; - - for_each_hw_engine(hwe, gt, id) { - struct xe_exec_queue *q, *nop_q; - void *default_lrc; - - if (gt->default_lrc[hwe->class]) - continue; - - xe_reg_sr_init(&hwe->reg_lrc, hwe->name, xe); - xe_wa_process_lrc(hwe); - xe_hw_engine_setup_default_lrc_state(hwe); - xe_tuning_process_lrc(hwe); - - default_lrc = drmm_kzalloc(&xe->drm, - xe_gt_lrc_size(gt, hwe->class), - GFP_KERNEL); - if (!default_lrc) - return -ENOMEM; - - q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1, - hwe, EXEC_QUEUE_FLAG_KERNEL, 0); - if (IS_ERR(q)) { - err = PTR_ERR(q); - xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n", - hwe->name, q); - return err; - } - - /* Prime golden LRC with known good state */ - err = emit_wa_job(gt, q); - if (err) { - xe_gt_err(gt, "hwe %s: emit_wa_job failed (%pe) guc_id=%u\n", - hwe->name, ERR_PTR(err), q->guc->id); - goto put_exec_queue; - } - - nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), - 1, hwe, EXEC_QUEUE_FLAG_KERNEL, 0); - if (IS_ERR(nop_q)) { - err = PTR_ERR(nop_q); - xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n", - hwe->name, nop_q); - goto put_exec_queue; - } - - /* Switch to different LRC */ - err = emit_nop_job(gt, nop_q); - if (err) { - xe_gt_err(gt, "hwe %s: nop emit_nop_job failed (%pe) guc_id=%u\n", - hwe->name, ERR_PTR(err), nop_q->guc->id); - goto put_nop_q; - } - - /* Reload golden LRC to record the effect of any indirect W/A */ - err = emit_nop_job(gt, q); - if (err) { - xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n", - hwe->name, ERR_PTR(err), q->guc->id); - goto put_nop_q; - } - - xe_map_memcpy_from(xe, default_lrc, - &q->lrc[0]->bo->vmap, - xe_lrc_pphwsp_offset(q->lrc[0]), - xe_gt_lrc_size(gt, hwe->class)); - - gt->default_lrc[hwe->class] = default_lrc; -put_nop_q: - xe_exec_queue_put(nop_q); -put_exec_queue: - xe_exec_queue_put(q); - if (err) - break; - } - - return err; -} - -int xe_gt_init_early(struct xe_gt *gt) -{ - int err; - - if (IS_SRIOV_PF(gt_to_xe(gt))) { - err = xe_gt_sriov_pf_init_early(gt); - if (err) - return err; - } - - xe_reg_sr_init(>->reg_sr, "GT", gt_to_xe(gt)); - - err = xe_wa_init(gt); - if (err) - return err; - - xe_wa_process_gt(gt); - xe_wa_process_oob(gt); - xe_tuning_process_gt(gt); - - xe_force_wake_init_gt(gt, gt_to_fw(gt)); - spin_lock_init(>->global_invl_lock); - - return 0; -} - -static void dump_pat_on_error(struct xe_gt *gt) -{ - struct drm_printer p; - char prefix[32]; - - snprintf(prefix, sizeof(prefix), "[GT%u Error]", gt->info.id); - p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, prefix); - - xe_pat_dump(gt, &p); -} - -static int gt_fw_domain_init(struct xe_gt *gt) -{ - int err, i; - - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - goto err_hw_fence_irq; - - if (!xe_gt_is_media_type(gt)) { - err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); - if (err) - goto err_force_wake; - if (IS_SRIOV_PF(gt_to_xe(gt))) - xe_lmtt_init(>_to_tile(gt)->sriov.pf.lmtt); - } - - /* Enable per hw engine IRQs */ - xe_irq_enable_hwe(gt); - - /* Rerun MCR init as we now have hw engine list */ - xe_gt_mcr_init(gt); - - err = xe_hw_engines_init_early(gt); - if (err) - goto err_force_wake; - - err = xe_hw_engine_class_sysfs_init(gt); - if (err) - goto err_force_wake; - - /* Initialize CCS mode sysfs after early initialization of HW engines */ - err = xe_gt_ccs_mode_sysfs_init(gt); - if (err) - goto err_force_wake; - - /* - * Stash hardware-reported version. Since this register does not exist - * on pre-MTL platforms, reading it there will (correctly) return 0. - */ - gt->info.gmdid = xe_mmio_read32(gt, GMD_ID); - - err = xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); - XE_WARN_ON(err); - - return 0; - -err_force_wake: - dump_pat_on_error(gt); - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -err_hw_fence_irq: - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); - - return err; -} - -static int all_fw_domain_init(struct xe_gt *gt) -{ - int err, i; - - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) - goto err_hw_fence_irq; - - xe_gt_mcr_set_implicit_defaults(gt); - xe_reg_sr_apply_mmio(>->reg_sr, gt); - - err = xe_gt_clock_init(gt); - if (err) - goto err_force_wake; - - xe_mocs_init(gt); - err = xe_execlist_init(gt); - if (err) - goto err_force_wake; - - err = xe_hw_engines_init(gt); - if (err) - goto err_force_wake; - - err = xe_uc_init_post_hwconfig(>->uc); - if (err) - goto err_force_wake; - - if (!xe_gt_is_media_type(gt)) { - /* - * USM has its only SA pool to non-block behind user operations - */ - if (gt_to_xe(gt)->info.has_usm) { - struct xe_device *xe = gt_to_xe(gt); - - gt->usm.bb_pool = xe_sa_bo_manager_init(gt_to_tile(gt), - IS_DGFX(xe) ? SZ_1M : SZ_512K, 16); - if (IS_ERR(gt->usm.bb_pool)) { - err = PTR_ERR(gt->usm.bb_pool); - goto err_force_wake; - } - } - } - - if (!xe_gt_is_media_type(gt)) { - struct xe_tile *tile = gt_to_tile(gt); - - tile->migrate = xe_migrate_init(tile); - if (IS_ERR(tile->migrate)) { - err = PTR_ERR(tile->migrate); - goto err_force_wake; - } - } - - err = xe_uc_init_hw(>->uc); - if (err) - goto err_force_wake; - - /* Configure default CCS mode of 1 engine with all resources */ - if (xe_gt_ccs_mode_enabled(gt)) { - gt->ccs_mode = 1; - xe_gt_apply_ccs_mode(gt); - } - - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) - xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); - - if (IS_SRIOV_PF(gt_to_xe(gt))) - xe_gt_sriov_pf_init_hw(gt); - - err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - XE_WARN_ON(err); - - return 0; - -err_force_wake: - xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); -err_hw_fence_irq: - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) - xe_hw_fence_irq_finish(>->fence_irq[i]); - - return err; -} - -/* - * Initialize enough GT to be able to load GuC in order to obtain hwconfig and - * enable CTB communication. - */ -int xe_gt_init_hwconfig(struct xe_gt *gt) -{ - int err; - - err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (err) - goto out; - - xe_gt_mcr_init_early(gt); - xe_pat_init(gt); - - err = xe_uc_init(>->uc); - if (err) - goto out_fw; - - err = xe_uc_init_hwconfig(>->uc); - if (err) - goto out_fw; - - xe_gt_topology_init(gt); - xe_gt_mcr_init(gt); - xe_gt_enable_host_l2_vram(gt); - -out_fw: - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); -out: - return err; -} - -int xe_gt_init(struct xe_gt *gt) -{ - int err; - int i; - - INIT_WORK(>->reset.worker, gt_reset_worker); - - for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) { - gt->ring_ops[i] = xe_ring_ops_get(gt, i); - xe_hw_fence_irq_init(>->fence_irq[i]); - } - - err = xe_gt_tlb_invalidation_init(gt); - if (err) - return err; - - err = xe_gt_pagefault_init(gt); - if (err) - return err; - - xe_mocs_init_early(gt); - - err = xe_gt_sysfs_init(gt); - if (err) - return err; - - err = gt_fw_domain_init(gt); - if (err) - return err; - - err = xe_gt_idle_init(>->gtidle); - if (err) - return err; - - err = xe_gt_freq_init(gt); - if (err) - return err; - - xe_force_wake_init_engines(gt, gt_to_fw(gt)); - - err = all_fw_domain_init(gt); - if (err) - return err; - - xe_gt_record_user_engines(gt); - - return 0; -} - -void xe_gt_record_user_engines(struct xe_gt *gt) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - - gt->user_engines.mask = 0; - memset(gt->user_engines.instances_per_class, 0, - sizeof(gt->user_engines.instances_per_class)); - - for_each_hw_engine(hwe, gt, id) { - if (xe_hw_engine_is_reserved(hwe)) - continue; - - gt->user_engines.mask |= BIT_ULL(id); - gt->user_engines.instances_per_class[hwe->class]++; - } - - xe_gt_assert(gt, (gt->user_engines.mask | gt->info.engine_mask) - == gt->info.engine_mask); -} - -static int do_gt_reset(struct xe_gt *gt) -{ - int err; - - xe_gsc_wa_14015076503(gt, true); - - xe_mmio_write32(gt, GDRST, GRDOM_FULL); - err = xe_mmio_wait32(gt, GDRST, GRDOM_FULL, 0, 5000, NULL, false); - if (err) - xe_gt_err(gt, "failed to clear GRDOM_FULL (%pe)\n", - ERR_PTR(err)); - - xe_gsc_wa_14015076503(gt, false); - - return err; -} - -static int vf_gt_restart(struct xe_gt *gt) -{ - int err; - - err = xe_uc_sanitize_reset(>->uc); - if (err) - return err; - - err = xe_uc_init_hw(>->uc); - if (err) - return err; - - err = xe_uc_start(>->uc); - if (err) - return err; - - return 0; -} - -static int do_gt_restart(struct xe_gt *gt) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - int err; - - if (IS_SRIOV_VF(gt_to_xe(gt))) - return vf_gt_restart(gt); - - xe_pat_init(gt); - - xe_gt_enable_host_l2_vram(gt); - - xe_gt_mcr_set_implicit_defaults(gt); - xe_reg_sr_apply_mmio(>->reg_sr, gt); - - err = xe_wopcm_init(>->uc.wopcm); - if (err) - return err; - - for_each_hw_engine(hwe, gt, id) - xe_hw_engine_enable_ring(hwe); - - err = xe_uc_sanitize_reset(>->uc); - if (err) - return err; - - err = xe_uc_init_hw(>->uc); - if (err) - return err; - - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) - xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); - - if (IS_SRIOV_PF(gt_to_xe(gt))) - xe_gt_sriov_pf_init_hw(gt); - - xe_mocs_init(gt); - err = xe_uc_start(>->uc); - if (err) - return err; - - for_each_hw_engine(hwe, gt, id) { - xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); - xe_reg_sr_apply_whitelist(hwe); - } - - /* Get CCS mode in sync between sw/hw */ - xe_gt_apply_ccs_mode(gt); - - /* Restore GT freq to expected values */ - xe_gt_sanitize_freq(gt); - - if (IS_SRIOV_PF(gt_to_xe(gt))) - xe_gt_sriov_pf_restart(gt); - - return 0; -} - -static int gt_reset(struct xe_gt *gt) -{ - int err; - - if (xe_device_wedged(gt_to_xe(gt))) - return -ECANCELED; - - /* We only support GT resets with GuC submission */ - if (!xe_device_uc_enabled(gt_to_xe(gt))) - return -ENODEV; - - xe_gt_info(gt, "reset started\n"); - - xe_pm_runtime_get(gt_to_xe(gt)); - - if (xe_fault_inject_gt_reset()) { - err = -ECANCELED; - goto err_fail; - } - - xe_gt_sanitize(gt); - - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) - goto err_msg; - - xe_uc_gucrc_disable(>->uc); - xe_uc_stop_prepare(>->uc); - xe_gt_pagefault_reset(gt); - - xe_uc_stop(>->uc); - - xe_gt_tlb_invalidation_reset(gt); - - err = do_gt_reset(gt); - if (err) - goto err_out; - - err = do_gt_restart(gt); - if (err) - goto err_out; - - err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - XE_WARN_ON(err); - xe_pm_runtime_put(gt_to_xe(gt)); - - xe_gt_info(gt, "reset done\n"); - - return 0; - -err_out: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -err_msg: - XE_WARN_ON(xe_uc_start(>->uc)); -err_fail: - xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); - - xe_device_declare_wedged(gt_to_xe(gt)); - xe_pm_runtime_put(gt_to_xe(gt)); - - return err; -} - -static void gt_reset_worker(struct work_struct *w) -{ - struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker); - - gt_reset(gt); -} - -void xe_gt_reset_async(struct xe_gt *gt) -{ - xe_gt_info(gt, "trying reset\n"); - - /* Don't do a reset while one is already in flight */ - if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(>->uc)) - return; - - xe_gt_info(gt, "reset queued\n"); - queue_work(gt->ordered_wq, >->reset.worker); -} - -void xe_gt_suspend_prepare(struct xe_gt *gt) -{ - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - - xe_uc_stop_prepare(>->uc); - - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -} - -int xe_gt_suspend(struct xe_gt *gt) -{ - int err; - - xe_gt_dbg(gt, "suspending\n"); - xe_gt_sanitize(gt); - - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) - goto err_msg; - - err = xe_uc_suspend(>->uc); - if (err) - goto err_force_wake; - - xe_gt_idle_disable_pg(gt); - - xe_gt_disable_host_l2_vram(gt); - - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_gt_dbg(gt, "suspended\n"); - - return 0; - -err_force_wake: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -err_msg: - xe_gt_err(gt, "suspend failed (%pe)\n", ERR_PTR(err)); - - return err; -} - -/** - * xe_gt_sanitize_freq() - Restore saved frequencies if necessary. - * @gt: the GT object - * - * Called after driver init/GSC load completes to restore GT frequencies if we - * limited them for any WAs. - */ -int xe_gt_sanitize_freq(struct xe_gt *gt) -{ - int ret = 0; - - if ((!xe_uc_fw_is_available(>->uc.gsc.fw) || - xe_uc_fw_is_loaded(>->uc.gsc.fw)) && XE_WA(gt, 22019338487)) - ret = xe_guc_pc_restore_stashed_freq(>->uc.guc.pc); - - return ret; -} - -int xe_gt_resume(struct xe_gt *gt) -{ - int err; - - xe_gt_dbg(gt, "resuming\n"); - err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (err) - goto err_msg; - - err = do_gt_restart(gt); - if (err) - goto err_force_wake; - - xe_gt_idle_enable_pg(gt); - - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_gt_dbg(gt, "resumed\n"); - - return 0; - -err_force_wake: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); -err_msg: - xe_gt_err(gt, "resume failed (%pe)\n", ERR_PTR(err)); - - return err; -} - -struct xe_hw_engine *xe_gt_hw_engine(struct xe_gt *gt, - enum xe_engine_class class, - u16 instance, bool logical) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - - for_each_hw_engine(hwe, gt, id) - if (hwe->class == class && - ((!logical && hwe->instance == instance) || - (logical && hwe->logical_instance == instance))) - return hwe; - - return NULL; -} - -struct xe_hw_engine *xe_gt_any_hw_engine_by_reset_domain(struct xe_gt *gt, - enum xe_engine_class class) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - - for_each_hw_engine(hwe, gt, id) { - switch (class) { - case XE_ENGINE_CLASS_RENDER: - case XE_ENGINE_CLASS_COMPUTE: - if (hwe->class == XE_ENGINE_CLASS_RENDER || - hwe->class == XE_ENGINE_CLASS_COMPUTE) - return hwe; - break; - default: - if (hwe->class == class) - return hwe; - } - } - - return NULL; -} - -struct xe_hw_engine *xe_gt_any_hw_engine(struct xe_gt *gt) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - - for_each_hw_engine(hwe, gt, id) - return hwe; - - return NULL; -} - -/** - * xe_gt_declare_wedged() - Declare GT wedged - * @gt: the GT object - * - * Wedge the GT which stops all submission, saves desired debug state, and - * cleans up anything which could timeout. - */ -void xe_gt_declare_wedged(struct xe_gt *gt) -{ - xe_gt_assert(gt, gt_to_xe(gt)->wedged.mode); - - xe_uc_declare_wedged(>->uc); - xe_gt_tlb_invalidation_reset(gt); -} diff --git a/rr-cache/6e989852f5454d81ebf331bbd2c55116dc711575/preimage b/rr-cache/6e989852f5454d81ebf331bbd2c55116dc711575/preimage deleted file mode 100644 index 42ffb0f8cccc..000000000000 --- a/rr-cache/6e989852f5454d81ebf331bbd2c55116dc711575/preimage +++ /dev/null @@ -1,982 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2021 Intel Corporation - */ - -#include "xe_exec_queue.h" - -#include <linux/nospec.h> - -#include <drm/drm_device.h> -#include <drm/drm_file.h> -#include <drm/xe_drm.h> - -#include "xe_device.h" -#include "xe_gt.h" -#include "xe_hw_engine_class_sysfs.h" -#include "xe_hw_fence.h" -#include "xe_lrc.h" -#include "xe_macros.h" -#include "xe_migrate.h" -#include "xe_pm.h" -#include "xe_ring_ops_types.h" -#include "xe_trace.h" -#include "xe_vm.h" - -enum xe_exec_queue_sched_prop { - XE_EXEC_QUEUE_JOB_TIMEOUT = 0, - XE_EXEC_QUEUE_TIMESLICE = 1, - XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2, - XE_EXEC_QUEUE_SCHED_PROP_MAX = 3, -}; - -static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, - u64 extensions, int ext_number); - -static void __xe_exec_queue_free(struct xe_exec_queue *q) -{ - if (q->vm) - xe_vm_put(q->vm); - - if (q->xef) - xe_file_put(q->xef); - - kfree(q); -} - -static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, - struct xe_vm *vm, - u32 logical_mask, - u16 width, struct xe_hw_engine *hwe, - u32 flags, u64 extensions) -{ - struct xe_exec_queue *q; - struct xe_gt *gt = hwe->gt; - int err; - - /* only kernel queues can be permanent */ - XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL)); - - q = kzalloc(struct_size(q, lrc, width), GFP_KERNEL); - if (!q) - return ERR_PTR(-ENOMEM); - - kref_init(&q->refcount); - q->flags = flags; - q->hwe = hwe; - q->gt = gt; - q->class = hwe->class; - q->width = width; - q->logical_mask = logical_mask; - q->fence_irq = >->fence_irq[hwe->class]; - q->ring_ops = gt->ring_ops[hwe->class]; - q->ops = gt->exec_queue_ops; - INIT_LIST_HEAD(&q->lr.link); - INIT_LIST_HEAD(&q->multi_gt_link); - - q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; - q->sched_props.preempt_timeout_us = - hwe->eclass->sched_props.preempt_timeout_us; - q->sched_props.job_timeout_ms = - hwe->eclass->sched_props.job_timeout_ms; - if (q->flags & EXEC_QUEUE_FLAG_KERNEL && - q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY) - q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; - else - q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; - - if (vm) - q->vm = xe_vm_get(vm); - - if (extensions) { - /* - * may set q->usm, must come before xe_lrc_create(), - * may overwrite q->sched_props, must come before q->ops->init() - */ - err = exec_queue_user_extensions(xe, q, extensions, 0); - if (err) { - __xe_exec_queue_free(q); - return ERR_PTR(err); - } - } - - return q; -} - -static int __xe_exec_queue_init(struct xe_exec_queue *q) -{ - struct xe_vm *vm = q->vm; - int i, err; - - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - return err; - } - - for (i = 0; i < q->width; ++i) { - q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K); - if (IS_ERR(q->lrc[i])) { - err = PTR_ERR(q->lrc[i]); - goto err_unlock; - } - } - - if (vm) - xe_vm_unlock(vm); - - err = q->ops->init(q); - if (err) - goto err_lrc; - - return 0; - -err_unlock: - if (vm) - xe_vm_unlock(vm); -err_lrc: - for (i = i - 1; i >= 0; --i) - xe_lrc_put(q->lrc[i]); - return err; -} - -struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, - u32 logical_mask, u16 width, - struct xe_hw_engine *hwe, u32 flags, - u64 extensions) -{ - struct xe_exec_queue *q; - int err; - - q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags, - extensions); - if (IS_ERR(q)) - return q; - - err = __xe_exec_queue_init(q); - if (err) - goto err_post_alloc; - - return q; - -err_post_alloc: - __xe_exec_queue_free(q); - return ERR_PTR(err); -} - -struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, - struct xe_vm *vm, - enum xe_engine_class class, - u32 flags, u64 extensions) -{ - struct xe_hw_engine *hwe, *hwe0 = NULL; - enum xe_hw_engine_id id; - u32 logical_mask = 0; - - for_each_hw_engine(hwe, gt, id) { - if (xe_hw_engine_is_reserved(hwe)) - continue; - - if (hwe->class == class) { - logical_mask |= BIT(hwe->logical_instance); - if (!hwe0) - hwe0 = hwe; - } - } - - if (!logical_mask) - return ERR_PTR(-ENODEV); - - return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions); -} - -/** - * xe_exec_queue_create_bind() - Create bind exec queue. - * @xe: Xe device. - * @tile: tile which bind exec queue belongs to. - * @flags: exec queue creation flags - * @extensions: exec queue creation extensions - * - * Normalize bind exec queue creation. Bind exec queue is tied to migration VM - * for access to physical memory required for page table programming. On a - * faulting devices the reserved copy engine instance must be used to avoid - * deadlocking (user binds cannot get stuck behind faults as kernel binds which - * resolve faults depend on user binds). On non-faulting devices any copy engine - * can be used. - * - * Returns exec queue on success, ERR_PTR on failure - */ -struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, - struct xe_tile *tile, - u32 flags, u64 extensions) -{ - struct xe_gt *gt = tile->primary_gt; - struct xe_exec_queue *q; - struct xe_vm *migrate_vm; - - migrate_vm = xe_migrate_get_vm(tile->migrate); - if (xe->info.has_usm) { - struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, - XE_ENGINE_CLASS_COPY, - gt->usm.reserved_bcs_instance, - false); - - if (!hwe) - return ERR_PTR(-EINVAL); - - q = xe_exec_queue_create(xe, migrate_vm, - BIT(hwe->logical_instance), 1, hwe, - flags, extensions); - } else { - q = xe_exec_queue_create_class(xe, gt, migrate_vm, - XE_ENGINE_CLASS_COPY, flags, - extensions); - } - xe_vm_put(migrate_vm); - - return q; -} - -void xe_exec_queue_destroy(struct kref *ref) -{ - struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); - struct xe_exec_queue *eq, *next; - - xe_exec_queue_last_fence_put_unlocked(q); - if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { - list_for_each_entry_safe(eq, next, &q->multi_gt_list, - multi_gt_link) - xe_exec_queue_put(eq); - } - - q->ops->fini(q); -} - -void xe_exec_queue_fini(struct xe_exec_queue *q) -{ - int i; - - for (i = 0; i < q->width; ++i) - xe_lrc_put(q->lrc[i]); - __xe_exec_queue_free(q); -} - -void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) -{ - switch (q->class) { - case XE_ENGINE_CLASS_RENDER: - snprintf(q->name, sizeof(q->name), "rcs%d", instance); - break; - case XE_ENGINE_CLASS_VIDEO_DECODE: - snprintf(q->name, sizeof(q->name), "vcs%d", instance); - break; - case XE_ENGINE_CLASS_VIDEO_ENHANCE: - snprintf(q->name, sizeof(q->name), "vecs%d", instance); - break; - case XE_ENGINE_CLASS_COPY: - snprintf(q->name, sizeof(q->name), "bcs%d", instance); - break; - case XE_ENGINE_CLASS_COMPUTE: - snprintf(q->name, sizeof(q->name), "ccs%d", instance); - break; - case XE_ENGINE_CLASS_OTHER: - snprintf(q->name, sizeof(q->name), "gsccs%d", instance); - break; - default: - XE_WARN_ON(q->class); - } -} - -struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id) -{ - struct xe_exec_queue *q; - - mutex_lock(&xef->exec_queue.lock); - q = xa_load(&xef->exec_queue.xa, id); - if (q) - xe_exec_queue_get(q); - mutex_unlock(&xef->exec_queue.lock); - - return q; -} - -enum xe_exec_queue_priority -xe_exec_queue_device_get_max_priority(struct xe_device *xe) -{ - return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH : - XE_EXEC_QUEUE_PRIORITY_NORMAL; -} - -static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q, - u64 value) -{ - if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe))) - return -EPERM; - - q->sched_props.priority = value; - return 0; -} - -static bool xe_exec_queue_enforce_schedule_limit(void) -{ -#if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) - return true; -#else - return !capable(CAP_SYS_NICE); -#endif -} - -static void -xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass, - enum xe_exec_queue_sched_prop prop, - u32 *min, u32 *max) -{ - switch (prop) { - case XE_EXEC_QUEUE_JOB_TIMEOUT: - *min = eclass->sched_props.job_timeout_min; - *max = eclass->sched_props.job_timeout_max; - break; - case XE_EXEC_QUEUE_TIMESLICE: - *min = eclass->sched_props.timeslice_min; - *max = eclass->sched_props.timeslice_max; - break; - case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: - *min = eclass->sched_props.preempt_timeout_min; - *max = eclass->sched_props.preempt_timeout_max; - break; - default: - break; - } -#if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) - if (capable(CAP_SYS_NICE)) { - switch (prop) { - case XE_EXEC_QUEUE_JOB_TIMEOUT: - *min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; - *max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; - break; - case XE_EXEC_QUEUE_TIMESLICE: - *min = XE_HW_ENGINE_TIMESLICE_MIN; - *max = XE_HW_ENGINE_TIMESLICE_MAX; - break; - case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: - *min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; - *max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; - break; - default: - break; - } - } -#endif -} - -static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q, - u64 value) -{ - u32 min = 0, max = 0; - - xe_exec_queue_get_prop_minmax(q->hwe->eclass, - XE_EXEC_QUEUE_TIMESLICE, &min, &max); - - if (xe_exec_queue_enforce_schedule_limit() && - !xe_hw_engine_timeout_in_range(value, min, max)) - return -EINVAL; - - q->sched_props.timeslice_us = value; - return 0; -} - -typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, - struct xe_exec_queue *q, - u64 value); - -static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, -}; - -static int exec_queue_user_ext_set_property(struct xe_device *xe, - struct xe_exec_queue *q, - u64 extension) -{ - u64 __user *address = u64_to_user_ptr(extension); - struct drm_xe_ext_set_property ext; - int err; - u32 idx; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(xe, ext.property >= - ARRAY_SIZE(exec_queue_set_property_funcs)) || - XE_IOCTL_DBG(xe, ext.pad) || - XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && - ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE)) - return -EINVAL; - - idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); - if (!exec_queue_set_property_funcs[idx]) - return -EINVAL; - - return exec_queue_set_property_funcs[idx](xe, q, ext.value); -} - -typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, - struct xe_exec_queue *q, - u64 extension); - -static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = { - [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, -}; - -#define MAX_USER_EXTENSIONS 16 -static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, - u64 extensions, int ext_number) -{ - u64 __user *address = u64_to_user_ptr(extensions); - struct drm_xe_user_extension ext; - int err; - u32 idx; - - if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) - return -E2BIG; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(xe, ext.pad) || - XE_IOCTL_DBG(xe, ext.name >= - ARRAY_SIZE(exec_queue_user_extension_funcs))) - return -EINVAL; - - idx = array_index_nospec(ext.name, - ARRAY_SIZE(exec_queue_user_extension_funcs)); - err = exec_queue_user_extension_funcs[idx](xe, q, extensions); - if (XE_IOCTL_DBG(xe, err)) - return err; - - if (ext.next_extension) - return exec_queue_user_extensions(xe, q, ext.next_extension, - ++ext_number); - - return 0; -} - -<<<<<<< -======= -static const enum xe_engine_class user_to_xe_engine_class[] = { - [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, - [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, - [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, - [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, - [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, -}; - -static struct xe_hw_engine * -find_hw_engine(struct xe_device *xe, - struct drm_xe_engine_class_instance eci) -{ - u32 idx; - - if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) - return NULL; - - if (eci.gt_id >= xe->info.gt_count) - return NULL; - - idx = array_index_nospec(eci.engine_class, - ARRAY_SIZE(user_to_xe_engine_class)); - - return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), - user_to_xe_engine_class[idx], - eci.engine_instance, true); -} - -static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt, - struct drm_xe_engine_class_instance *eci, - u16 width, u16 num_placements) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - u32 logical_mask = 0; - - if (XE_IOCTL_DBG(xe, width != 1)) - return 0; - if (XE_IOCTL_DBG(xe, num_placements != 1)) - return 0; - if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) - return 0; - - eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; - - for_each_hw_engine(hwe, gt, id) { - if (xe_hw_engine_is_reserved(hwe)) - continue; - - if (hwe->class == - user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY]) - logical_mask |= BIT(hwe->logical_instance); - } - - return logical_mask; -} - ->>>>>>> -static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, - struct drm_xe_engine_class_instance *eci, - u16 width, u16 num_placements) -{ - int len = width * num_placements; - int i, j, n; - u16 class; - u16 gt_id; - u32 return_mask = 0, prev_mask; - - if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) && - len > 1)) - return 0; - - for (i = 0; i < width; ++i) { - u32 current_mask = 0; - - for (j = 0; j < num_placements; ++j) { - struct xe_hw_engine *hwe; - - n = j * width + i; - - hwe = find_hw_engine(xe, eci[n]); - if (XE_IOCTL_DBG(xe, !hwe)) - return 0; - - if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe))) - return 0; - - if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) || - XE_IOCTL_DBG(xe, n && eci[n].engine_class != class)) - return 0; - - class = eci[n].engine_class; - gt_id = eci[n].gt_id; - - if (width == 1 || !i) - return_mask |= BIT(eci[n].engine_instance); - current_mask |= BIT(eci[n].engine_instance); - } - - /* Parallel submissions must be logically contiguous */ - if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1)) - return 0; - - prev_mask = current_mask; - } - - return return_mask; -} - -int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_exec_queue_create *args = data; - struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; - struct drm_xe_engine_class_instance __user *user_eci = - u64_to_user_ptr(args->instances); - struct xe_hw_engine *hwe; - struct xe_vm *vm; - struct xe_gt *gt; - struct xe_tile *tile; - struct xe_exec_queue *q = NULL; - u32 logical_mask; - u32 id; - u32 len; - int err; - - if (XE_IOCTL_DBG(xe, args->flags) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - len = args->width * args->num_placements; - if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) - return -EINVAL; - - err = __copy_from_user(eci, user_eci, - sizeof(struct drm_xe_engine_class_instance) * - len); - if (XE_IOCTL_DBG(xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) - return -EINVAL; - - if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { - if (XE_IOCTL_DBG(xe, args->width != 1) || - XE_IOCTL_DBG(xe, args->num_placements != 1) || - XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) - return -EINVAL; - - for_each_tile(tile, xe, id) { - struct xe_exec_queue *new; - u32 flags = EXEC_QUEUE_FLAG_VM; - - if (id) - flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; - -<<<<<<< - eci[0].gt_id = gt->info.id; - logical_mask = bind_exec_queue_logical_mask(xe, gt, eci, - args->width, - args->num_placements); - if (XE_IOCTL_DBG(xe, !logical_mask)) - return -EINVAL; - - hwe = find_hw_engine(xe, eci[0]); - if (XE_IOCTL_DBG(xe, !hwe)) - return -EINVAL; - - /* The migration vm doesn't hold rpm ref */ - xe_pm_runtime_get_noresume(xe); - - flags = EXEC_QUEUE_FLAG_VM | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0); - - migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); - new = xe_exec_queue_create(xe, migrate_vm, logical_mask, - args->width, hwe, flags, - args->extensions); - - xe_pm_runtime_put(xe); /* now held by engine */ - - xe_vm_put(migrate_vm); -======= - new = xe_exec_queue_create_bind(xe, tile, flags, - args->extensions); ->>>>>>> - if (IS_ERR(new)) { - err = PTR_ERR(new); - if (q) - goto put_exec_queue; - return err; - } - if (id == 0) - q = new; - else - list_add_tail(&new->multi_gt_list, - &q->multi_gt_link); - } - } else { - gt = xe_device_get_gt(xe, eci[0].gt_id); - logical_mask = calc_validate_logical_mask(xe, gt, eci, - args->width, - args->num_placements); - if (XE_IOCTL_DBG(xe, !logical_mask)) - return -EINVAL; - - hwe = find_hw_engine(xe, eci[0]); - if (XE_IOCTL_DBG(xe, !hwe)) - return -EINVAL; - - vm = xe_vm_lookup(xef, args->vm_id); - if (XE_IOCTL_DBG(xe, !vm)) - return -ENOENT; - - err = down_read_interruptible(&vm->lock); - if (err) { - xe_vm_put(vm); - return err; - } - - if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { - up_read(&vm->lock); - xe_vm_put(vm); - return -ENOENT; - } - - q = xe_exec_queue_create(xe, vm, logical_mask, - args->width, hwe, 0, - args->extensions); - up_read(&vm->lock); - xe_vm_put(vm); - if (IS_ERR(q)) - return PTR_ERR(q); - - if (xe_vm_in_preempt_fence_mode(vm)) { - q->lr.context = dma_fence_context_alloc(1); - - err = xe_vm_add_compute_exec_queue(vm, q); - if (XE_IOCTL_DBG(xe, err)) - goto put_exec_queue; - } - } - - mutex_lock(&xef->exec_queue.lock); - err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); - mutex_unlock(&xef->exec_queue.lock); - if (err) - goto kill_exec_queue; - - args->exec_queue_id = id; - q->xef = xe_file_get(xef); - - return 0; - -kill_exec_queue: - xe_exec_queue_kill(q); -put_exec_queue: - xe_exec_queue_put(q); - return err; -} - -int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_exec_queue_get_property *args = data; - struct xe_exec_queue *q; - int ret; - - if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - q = xe_exec_queue_lookup(xef, args->exec_queue_id); - if (XE_IOCTL_DBG(xe, !q)) - return -ENOENT; - - switch (args->property) { - case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN: - args->value = q->ops->reset_status(q); - ret = 0; - break; - default: - ret = -EINVAL; - } - - xe_exec_queue_put(q); - - return ret; -} - -/** - * xe_exec_queue_is_lr() - Whether an exec_queue is long-running - * @q: The exec_queue - * - * Return: True if the exec_queue is long-running, false otherwise. - */ -bool xe_exec_queue_is_lr(struct xe_exec_queue *q) -{ - return q->vm && xe_vm_in_lr_mode(q->vm) && - !(q->flags & EXEC_QUEUE_FLAG_VM); -} - -static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q) -{ - return q->lrc[0]->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc[0]) - 1; -} - -/** - * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full - * @q: The exec_queue - * - * Return: True if the exec_queue's ring is full, false otherwise. - */ -bool xe_exec_queue_ring_full(struct xe_exec_queue *q) -{ - struct xe_lrc *lrc = q->lrc[0]; - s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES; - - return xe_exec_queue_num_job_inflight(q) >= max_job; -} - -/** - * xe_exec_queue_is_idle() - Whether an exec_queue is idle. - * @q: The exec_queue - * - * FIXME: Need to determine what to use as the short-lived - * timeline lock for the exec_queues, so that the return value - * of this function becomes more than just an advisory - * snapshot in time. The timeline lock must protect the - * seqno from racing submissions on the same exec_queue. - * Typically vm->resv, but user-created timeline locks use the migrate vm - * and never grabs the migrate vm->resv so we have a race there. - * - * Return: True if the exec_queue is idle, false otherwise. - */ -bool xe_exec_queue_is_idle(struct xe_exec_queue *q) -{ - if (xe_exec_queue_is_parallel(q)) { - int i; - - for (i = 0; i < q->width; ++i) { - if (xe_lrc_seqno(q->lrc[i]) != - q->lrc[i]->fence_ctx.next_seqno - 1) - return false; - } - - return true; - } - - return xe_lrc_seqno(q->lrc[0]) == - q->lrc[0]->fence_ctx.next_seqno - 1; -} - -/** - * xe_exec_queue_update_run_ticks() - Update run time in ticks for this exec queue - * from hw - * @q: The exec queue - * - * Update the timestamp saved by HW for this exec queue and save run ticks - * calculated by using the delta from last update. - */ -void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) -{ - struct xe_file *xef; - struct xe_lrc *lrc; - u32 old_ts, new_ts; - - /* - * Jobs that are run during driver load may use an exec_queue, but are - * not associated with a user xe file, so avoid accumulating busyness - * for kernel specific work. - */ - if (!q->vm || !q->vm->xef) - return; - - xef = q->vm->xef; - - /* - * Only sample the first LRC. For parallel submission, all of them are - * scheduled together and we compensate that below by multiplying by - * width - this may introduce errors if that premise is not true and - * they don't exit 100% aligned. On the other hand, looping through - * the LRCs and reading them in different time could also introduce - * errors. - */ - lrc = q->lrc[0]; - new_ts = xe_lrc_update_timestamp(lrc, &old_ts); - xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; -} - -void xe_exec_queue_kill(struct xe_exec_queue *q) -{ - struct xe_exec_queue *eq = q, *next; - - list_for_each_entry_safe(eq, next, &eq->multi_gt_list, - multi_gt_link) { - q->ops->kill(eq); - xe_vm_remove_compute_exec_queue(q->vm, eq); - } - - q->ops->kill(q); - xe_vm_remove_compute_exec_queue(q->vm, q); -} - -int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_exec_queue_destroy *args = data; - struct xe_exec_queue *q; - - if (XE_IOCTL_DBG(xe, args->pad) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - mutex_lock(&xef->exec_queue.lock); - q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id); - mutex_unlock(&xef->exec_queue.lock); - if (XE_IOCTL_DBG(xe, !q)) - return -ENOENT; - - xe_exec_queue_kill(q); - - trace_xe_exec_queue_close(q); - xe_exec_queue_put(q); - - return 0; -} - -static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, - struct xe_vm *vm) -{ - if (q->flags & EXEC_QUEUE_FLAG_VM) - lockdep_assert_held(&vm->lock); - else - xe_vm_assert_held(vm); -} - -/** - * xe_exec_queue_last_fence_put() - Drop ref to last fence - * @q: The exec queue - * @vm: The VM the engine does a bind or exec for - */ -void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) -{ - xe_exec_queue_last_fence_lockdep_assert(q, vm); - - if (q->last_fence) { - dma_fence_put(q->last_fence); - q->last_fence = NULL; - } -} - -/** - * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked - * @q: The exec queue - * - * Only safe to be called from xe_exec_queue_destroy(). - */ -void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q) -{ - if (q->last_fence) { - dma_fence_put(q->last_fence); - q->last_fence = NULL; - } -} - -/** - * xe_exec_queue_last_fence_get() - Get last fence - * @q: The exec queue - * @vm: The VM the engine does a bind or exec for - * - * Get last fence, takes a ref - * - * Returns: last fence if not signaled, dma fence stub if signaled - */ -struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, - struct xe_vm *vm) -{ - struct dma_fence *fence; - - xe_exec_queue_last_fence_lockdep_assert(q, vm); - - if (q->last_fence && - test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) - xe_exec_queue_last_fence_put(q, vm); - - fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); - dma_fence_get(fence); - return fence; -} - -/** - * xe_exec_queue_last_fence_set() - Set last fence - * @q: The exec queue - * @vm: The VM the engine does a bind or exec for - * @fence: The fence - * - * Set the last fence for the engine. Increases reference count for fence, when - * closing engine xe_exec_queue_last_fence_put should be called. - */ -void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, - struct dma_fence *fence) -{ - xe_exec_queue_last_fence_lockdep_assert(q, vm); - - xe_exec_queue_last_fence_put(q, vm); - q->last_fence = dma_fence_get(fence); -} diff --git a/rr-cache/96c1a0fb8618e58ea8a7af4351f2c7afbd4142f8/postimage b/rr-cache/96c1a0fb8618e58ea8a7af4351f2c7afbd4142f8/postimage deleted file mode 100644 index f27a2c75b56d..000000000000 --- a/rr-cache/96c1a0fb8618e58ea8a7af4351f2c7afbd4142f8/postimage +++ /dev/null @@ -1,121 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ -#ifndef _XE_I915_DRV_H_ -#define _XE_I915_DRV_H_ - -/* - * "Adaptation header" to allow i915 display to also build for xe driver. - * TODO: refactor i915 and xe so this can cease to exist - */ - -#include <drm/drm_drv.h> - -#include "i915_utils.h" -#include "intel_runtime_pm.h" -#include "xe_device_types.h" - -static inline struct drm_i915_private *to_i915(const struct drm_device *dev) -{ - return container_of(dev, struct drm_i915_private, drm); -} - -#define IS_PLATFORM(xe, x) ((xe)->info.platform == x) -#define INTEL_INFO(dev_priv) (&((dev_priv)->info)) -#define IS_I830(dev_priv) (dev_priv && 0) -#define IS_I845G(dev_priv) (dev_priv && 0) -#define IS_I85X(dev_priv) (dev_priv && 0) -#define IS_I865G(dev_priv) (dev_priv && 0) -#define IS_I915G(dev_priv) (dev_priv && 0) -#define IS_I915GM(dev_priv) (dev_priv && 0) -#define IS_I945G(dev_priv) (dev_priv && 0) -#define IS_I945GM(dev_priv) (dev_priv && 0) -#define IS_I965G(dev_priv) (dev_priv && 0) -#define IS_I965GM(dev_priv) (dev_priv && 0) -#define IS_G45(dev_priv) (dev_priv && 0) -#define IS_GM45(dev_priv) (dev_priv && 0) -#define IS_G4X(dev_priv) (dev_priv && 0) -#define IS_PINEVIEW(dev_priv) (dev_priv && 0) -#define IS_G33(dev_priv) (dev_priv && 0) -#define IS_IRONLAKE(dev_priv) (dev_priv && 0) -#define IS_IRONLAKE_M(dev_priv) (dev_priv && 0) -#define IS_SANDYBRIDGE(dev_priv) (dev_priv && 0) -#define IS_IVYBRIDGE(dev_priv) (dev_priv && 0) -#define IS_IVB_GT1(dev_priv) (dev_priv && 0) -#define IS_VALLEYVIEW(dev_priv) (dev_priv && 0) -#define IS_CHERRYVIEW(dev_priv) (dev_priv && 0) -#define IS_HASWELL(dev_priv) (dev_priv && 0) -#define IS_BROADWELL(dev_priv) (dev_priv && 0) -#define IS_SKYLAKE(dev_priv) (dev_priv && 0) -#define IS_BROXTON(dev_priv) (dev_priv && 0) -#define IS_KABYLAKE(dev_priv) (dev_priv && 0) -#define IS_GEMINILAKE(dev_priv) (dev_priv && 0) -#define IS_COFFEELAKE(dev_priv) (dev_priv && 0) -#define IS_COMETLAKE(dev_priv) (dev_priv && 0) -#define IS_ICELAKE(dev_priv) (dev_priv && 0) -#define IS_JASPERLAKE(dev_priv) (dev_priv && 0) -#define IS_ELKHARTLAKE(dev_priv) (dev_priv && 0) -#define IS_TIGERLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_TIGERLAKE) -#define IS_ROCKETLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_ROCKETLAKE) -#define IS_DG1(dev_priv) IS_PLATFORM(dev_priv, XE_DG1) -#define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_S) -#define IS_ALDERLAKE_P(dev_priv) (IS_PLATFORM(dev_priv, XE_ALDERLAKE_P) || \ - IS_PLATFORM(dev_priv, XE_ALDERLAKE_N)) -#define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, XE_DG2) -#define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_METEORLAKE) -#define IS_LUNARLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_LUNARLAKE) -#define IS_BATTLEMAGE(dev_priv) IS_PLATFORM(dev_priv, XE_BATTLEMAGE) - -#define IS_HASWELL_ULT(dev_priv) (dev_priv && 0) -#define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0) -#define IS_BROADWELL_ULX(dev_priv) (dev_priv && 0) - -#define IP_VER(ver, rel) ((ver) << 8 | (rel)) - -#define IS_MOBILE(xe) (xe && 0) - -#define IS_LP(xe) ((xe) && 0) -#define IS_GEN9_LP(xe) ((xe) && 0) -#define IS_GEN9_BC(xe) ((xe) && 0) - -#define IS_TIGERLAKE_UY(xe) (xe && 0) -#define IS_COMETLAKE_ULX(xe) (xe && 0) -#define IS_COFFEELAKE_ULX(xe) (xe && 0) -#define IS_KABYLAKE_ULX(xe) (xe && 0) -#define IS_SKYLAKE_ULX(xe) (xe && 0) -#define IS_HASWELL_ULX(xe) (xe && 0) -#define IS_COMETLAKE_ULT(xe) (xe && 0) -#define IS_COFFEELAKE_ULT(xe) (xe && 0) -#define IS_KABYLAKE_ULT(xe) (xe && 0) -#define IS_SKYLAKE_ULT(xe) (xe && 0) - -#define IS_DG2_G10(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G10) -#define IS_DG2_G11(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G11) -#define IS_DG2_G12(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G12) -#define IS_RAPTORLAKE_U(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_P_RPLU) -#define IS_ICL_WITH_PORT_F(xe) (xe && 0) -#define HAS_FLAT_CCS(xe) (xe_device_has_flat_ccs(xe)) - -#define HAS_128_BYTE_Y_TILING(xe) (xe || 1) - -#define I915_PRIORITY_DISPLAY 0 -struct i915_sched_attr { - int priority; -}; -#define i915_gem_fence_wait_priority(fence, attr) do { (void) attr; } while (0) - -#define FORCEWAKE_ALL XE_FORCEWAKE_ALL - -#ifdef CONFIG_ARM64 -/* - * arm64 indirectly includes linux/rtc.h, - * which defines a irq_lock, so include it - * here before #define-ing it - */ -#include <linux/rtc.h> -#endif - -#define irq_lock irq.lock - -#endif diff --git a/rr-cache/96c1a0fb8618e58ea8a7af4351f2c7afbd4142f8/preimage b/rr-cache/96c1a0fb8618e58ea8a7af4351f2c7afbd4142f8/preimage deleted file mode 100644 index 6c9ed3e5557c..000000000000 --- a/rr-cache/96c1a0fb8618e58ea8a7af4351f2c7afbd4142f8/preimage +++ /dev/null @@ -1,132 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ -#ifndef _XE_I915_DRV_H_ -#define _XE_I915_DRV_H_ - -/* - * "Adaptation header" to allow i915 display to also build for xe driver. - * TODO: refactor i915 and xe so this can cease to exist - */ - -#include <drm/drm_drv.h> - -#include "i915_utils.h" -#include "intel_runtime_pm.h" -#include "xe_device_types.h" - -static inline struct drm_i915_private *to_i915(const struct drm_device *dev) -{ - return container_of(dev, struct drm_i915_private, drm); -} - -#define IS_PLATFORM(xe, x) ((xe)->info.platform == x) -#define INTEL_INFO(dev_priv) (&((dev_priv)->info)) -#define IS_I830(dev_priv) (dev_priv && 0) -#define IS_I845G(dev_priv) (dev_priv && 0) -#define IS_I85X(dev_priv) (dev_priv && 0) -#define IS_I865G(dev_priv) (dev_priv && 0) -#define IS_I915G(dev_priv) (dev_priv && 0) -#define IS_I915GM(dev_priv) (dev_priv && 0) -#define IS_I945G(dev_priv) (dev_priv && 0) -#define IS_I945GM(dev_priv) (dev_priv && 0) -#define IS_I965G(dev_priv) (dev_priv && 0) -#define IS_I965GM(dev_priv) (dev_priv && 0) -#define IS_G45(dev_priv) (dev_priv && 0) -#define IS_GM45(dev_priv) (dev_priv && 0) -#define IS_G4X(dev_priv) (dev_priv && 0) -#define IS_PINEVIEW(dev_priv) (dev_priv && 0) -#define IS_G33(dev_priv) (dev_priv && 0) -#define IS_IRONLAKE(dev_priv) (dev_priv && 0) -#define IS_IRONLAKE_M(dev_priv) (dev_priv && 0) -#define IS_SANDYBRIDGE(dev_priv) (dev_priv && 0) -#define IS_IVYBRIDGE(dev_priv) (dev_priv && 0) -#define IS_IVB_GT1(dev_priv) (dev_priv && 0) -#define IS_VALLEYVIEW(dev_priv) (dev_priv && 0) -#define IS_CHERRYVIEW(dev_priv) (dev_priv && 0) -#define IS_HASWELL(dev_priv) (dev_priv && 0) -#define IS_BROADWELL(dev_priv) (dev_priv && 0) -#define IS_SKYLAKE(dev_priv) (dev_priv && 0) -#define IS_BROXTON(dev_priv) (dev_priv && 0) -#define IS_KABYLAKE(dev_priv) (dev_priv && 0) -#define IS_GEMINILAKE(dev_priv) (dev_priv && 0) -#define IS_COFFEELAKE(dev_priv) (dev_priv && 0) -#define IS_COMETLAKE(dev_priv) (dev_priv && 0) -#define IS_ICELAKE(dev_priv) (dev_priv && 0) -#define IS_JASPERLAKE(dev_priv) (dev_priv && 0) -#define IS_ELKHARTLAKE(dev_priv) (dev_priv && 0) -#define IS_TIGERLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_TIGERLAKE) -#define IS_ROCKETLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_ROCKETLAKE) -#define IS_DG1(dev_priv) IS_PLATFORM(dev_priv, XE_DG1) -#define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_S) -#define IS_ALDERLAKE_P(dev_priv) (IS_PLATFORM(dev_priv, XE_ALDERLAKE_P) || \ - IS_PLATFORM(dev_priv, XE_ALDERLAKE_N)) -#define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, XE_DG2) -#define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_METEORLAKE) -#define IS_LUNARLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_LUNARLAKE) -#define IS_BATTLEMAGE(dev_priv) IS_PLATFORM(dev_priv, XE_BATTLEMAGE) - -#define IS_HASWELL_ULT(dev_priv) (dev_priv && 0) -#define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0) -#define IS_BROADWELL_ULX(dev_priv) (dev_priv && 0) - -#define IP_VER(ver, rel) ((ver) << 8 | (rel)) - -#define IS_MOBILE(xe) (xe && 0) - -<<<<<<< -#define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270 - -/* Workarounds not handled yet */ -#define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step < last; }) - -#define IS_LP(xe) (0) -#define IS_GEN9_LP(xe) (0) -#define IS_GEN9_BC(xe) (0) -======= -#define IS_LP(xe) ((xe) && 0) -#define IS_GEN9_LP(xe) ((xe) && 0) -#define IS_GEN9_BC(xe) ((xe) && 0) ->>>>>>> - -#define IS_TIGERLAKE_UY(xe) (xe && 0) -#define IS_COMETLAKE_ULX(xe) (xe && 0) -#define IS_COFFEELAKE_ULX(xe) (xe && 0) -#define IS_KABYLAKE_ULX(xe) (xe && 0) -#define IS_SKYLAKE_ULX(xe) (xe && 0) -#define IS_HASWELL_ULX(xe) (xe && 0) -#define IS_COMETLAKE_ULT(xe) (xe && 0) -#define IS_COFFEELAKE_ULT(xe) (xe && 0) -#define IS_KABYLAKE_ULT(xe) (xe && 0) -#define IS_SKYLAKE_ULT(xe) (xe && 0) - -#define IS_DG2_G10(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G10) -#define IS_DG2_G11(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G11) -#define IS_DG2_G12(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_DG2_G12) -#define IS_RAPTORLAKE_U(xe) ((xe)->info.subplatform == XE_SUBPLATFORM_ALDERLAKE_P_RPLU) -#define IS_ICL_WITH_PORT_F(xe) (xe && 0) -#define HAS_FLAT_CCS(xe) (xe_device_has_flat_ccs(xe)) - -#define HAS_128_BYTE_Y_TILING(xe) (xe || 1) - -#define I915_PRIORITY_DISPLAY 0 -struct i915_sched_attr { - int priority; -}; -#define i915_gem_fence_wait_priority(fence, attr) do { (void) attr; } while (0) - -#define FORCEWAKE_ALL XE_FORCEWAKE_ALL - -#ifdef CONFIG_ARM64 -/* - * arm64 indirectly includes linux/rtc.h, - * which defines a irq_lock, so include it - * here before #define-ing it - */ -#include <linux/rtc.h> -#endif - -#define irq_lock irq.lock - -#endif diff --git a/rr-cache/e7c9aafc2297a37f89715cfeed48ccbfb82f76bb/preimage b/rr-cache/e7c9aafc2297a37f89715cfeed48ccbfb82f76bb/preimage deleted file mode 100644 index 110e70f7ee7b..000000000000 --- a/rr-cache/e7c9aafc2297a37f89715cfeed48ccbfb82f76bb/preimage +++ /dev/null @@ -1,2244 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include "xe_guc_submit.h" - -#include <linux/bitfield.h> -#include <linux/bitmap.h> -#include <linux/circ_buf.h> -#include <linux/delay.h> -#include <linux/dma-fence-array.h> -#include <linux/math64.h> - -#include <drm/drm_managed.h> - -#include "abi/guc_actions_abi.h" -#include "abi/guc_klvs_abi.h" -#include "regs/xe_lrc_layout.h" -#include "xe_assert.h" -#include "xe_devcoredump.h" -#include "xe_device.h" -#include "xe_exec_queue.h" -#include "xe_force_wake.h" -#include "xe_gpu_scheduler.h" -#include "xe_gt.h" -#include "xe_gt_clock.h" -#include "xe_gt_printk.h" -#include "xe_guc.h" -#include "xe_guc_ct.h" -#include "xe_guc_exec_queue_types.h" -#include "xe_guc_id_mgr.h" -#include "xe_guc_submit_types.h" -#include "xe_hw_engine.h" -#include "xe_hw_fence.h" -#include "xe_lrc.h" -#include "xe_macros.h" -#include "xe_map.h" -#include "xe_mocs.h" -#include "xe_pm.h" -#include "xe_ring_ops_types.h" -#include "xe_sched_job.h" -#include "xe_trace.h" -#include "xe_vm.h" - -static struct xe_guc * -exec_queue_to_guc(struct xe_exec_queue *q) -{ - return &q->gt->uc.guc; -} - -/* - * Helpers for engine state, using an atomic as some of the bits can transition - * as the same time (e.g. a suspend can be happning at the same time as schedule - * engine done being processed). - */ -#define EXEC_QUEUE_STATE_REGISTERED (1 << 0) -#define EXEC_QUEUE_STATE_ENABLED (1 << 1) -#define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) -#define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) -#define EXEC_QUEUE_STATE_DESTROYED (1 << 4) -#define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) -#define EXEC_QUEUE_STATE_RESET (1 << 6) -#define EXEC_QUEUE_STATE_KILLED (1 << 7) -#define EXEC_QUEUE_STATE_WEDGED (1 << 8) -#define EXEC_QUEUE_STATE_BANNED (1 << 9) -#define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10) -#define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11) - -static bool exec_queue_registered(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; -} - -static void set_exec_queue_registered(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); -} - -static void clear_exec_queue_registered(struct xe_exec_queue *q) -{ - atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); -} - -static bool exec_queue_enabled(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; -} - -static void set_exec_queue_enabled(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); -} - -static void clear_exec_queue_enabled(struct xe_exec_queue *q) -{ - atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); -} - -static bool exec_queue_pending_enable(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; -} - -static void set_exec_queue_pending_enable(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); -} - -static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) -{ - atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); -} - -static bool exec_queue_pending_disable(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; -} - -static void set_exec_queue_pending_disable(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); -} - -static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) -{ - atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); -} - -static bool exec_queue_destroyed(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; -} - -static void set_exec_queue_destroyed(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); -} - -static bool exec_queue_banned(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; -} - -static void set_exec_queue_banned(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); -} - -static bool exec_queue_suspended(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; -} - -static void set_exec_queue_suspended(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); -} - -static void clear_exec_queue_suspended(struct xe_exec_queue *q) -{ - atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); -} - -static bool exec_queue_reset(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; -} - -static void set_exec_queue_reset(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); -} - -static bool exec_queue_killed(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; -} - -static void set_exec_queue_killed(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); -} - -static bool exec_queue_wedged(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; -} - -static void set_exec_queue_wedged(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); -} - -static bool exec_queue_check_timeout(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_CHECK_TIMEOUT; -} - -static void set_exec_queue_check_timeout(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); -} - -static void clear_exec_queue_check_timeout(struct xe_exec_queue *q) -{ - atomic_and(~EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); -} - -static bool exec_queue_extra_ref(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_EXTRA_REF; -} - -static void set_exec_queue_extra_ref(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); -} - -static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) -{ - return (atomic_read(&q->guc->state) & - (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | - EXEC_QUEUE_STATE_BANNED)); -} - -#ifdef CONFIG_PROVE_LOCKING -static int alloc_submit_wq(struct xe_guc *guc) -{ - int i; - - for (i = 0; i < NUM_SUBMIT_WQ; ++i) { - guc->submission_state.submit_wq_pool[i] = - alloc_ordered_workqueue("submit_wq", 0); - if (!guc->submission_state.submit_wq_pool[i]) - goto err_free; - } - - return 0; - -err_free: - while (i) - destroy_workqueue(guc->submission_state.submit_wq_pool[--i]); - - return -ENOMEM; -} - -static void free_submit_wq(struct xe_guc *guc) -{ - int i; - - for (i = 0; i < NUM_SUBMIT_WQ; ++i) - destroy_workqueue(guc->submission_state.submit_wq_pool[i]); -} - -static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) -{ - int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ; - - return guc->submission_state.submit_wq_pool[idx]; -} -#else -static int alloc_submit_wq(struct xe_guc *guc) -{ - return 0; -} - -static void free_submit_wq(struct xe_guc *guc) -{ - -} - -static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) -{ - return NULL; -} -#endif - -static void guc_submit_fini(struct drm_device *drm, void *arg) -{ - struct xe_guc *guc = arg; - - xa_destroy(&guc->submission_state.exec_queue_lookup); - free_submit_wq(guc); -} - -static void guc_submit_wedged_fini(void *arg) -{ - struct xe_guc *guc = arg; - struct xe_exec_queue *q; - unsigned long index; - - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - if (exec_queue_wedged(q)) - xe_exec_queue_put(q); -} - -static const struct xe_exec_queue_ops guc_exec_queue_ops; - -static void primelockdep(struct xe_guc *guc) -{ - if (!IS_ENABLED(CONFIG_LOCKDEP)) - return; - - fs_reclaim_acquire(GFP_KERNEL); - - mutex_lock(&guc->submission_state.lock); - mutex_unlock(&guc->submission_state.lock); - - fs_reclaim_release(GFP_KERNEL); -} - -/** - * xe_guc_submit_init() - Initialize GuC submission. - * @guc: the &xe_guc to initialize - * @num_ids: number of GuC context IDs to use - * - * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all - * GuC context IDs supported by the GuC firmware should be used for submission. - * - * Only VF drivers will have to provide explicit number of GuC context IDs - * that they can use for submission. - * - * Return: 0 on success or a negative error code on failure. - */ -int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) -{ - struct xe_device *xe = guc_to_xe(guc); - struct xe_gt *gt = guc_to_gt(guc); - int err; - - err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); - if (err) - return err; - - err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids); - if (err) - return err; - - err = alloc_submit_wq(guc); - if (err) - return err; - - gt->exec_queue_ops = &guc_exec_queue_ops; - - xa_init(&guc->submission_state.exec_queue_lookup); - - primelockdep(guc); - - return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); -} - -static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) -{ - int i; - - lockdep_assert_held(&guc->submission_state.lock); - - for (i = 0; i < xa_count; ++i) - xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); - - xe_guc_id_mgr_release_locked(&guc->submission_state.idm, - q->guc->id, q->width); -} - -static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) -{ - int ret; - void *ptr; - int i; - - /* - * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, - * worse case user gets -ENOMEM on engine create and has to try again. - * - * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent - * failure. - */ - lockdep_assert_held(&guc->submission_state.lock); - - ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, - q->width); - if (ret < 0) - return ret; - - q->guc->id = ret; - - for (i = 0; i < q->width; ++i) { - ptr = xa_store(&guc->submission_state.exec_queue_lookup, - q->guc->id + i, q, GFP_NOWAIT); - if (IS_ERR(ptr)) { - ret = PTR_ERR(ptr); - goto err_release; - } - } - - return 0; - -err_release: - __release_guc_id(guc, q, i); - - return ret; -} - -static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) -{ - mutex_lock(&guc->submission_state.lock); - __release_guc_id(guc, q, q->width); - mutex_unlock(&guc->submission_state.lock); -} - -struct exec_queue_policy { - u32 count; - struct guc_update_exec_queue_policy h2g; -}; - -static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) -{ - size_t bytes = sizeof(policy->h2g.header) + - (sizeof(policy->h2g.klv[0]) * policy->count); - - return bytes / sizeof(u32); -} - -static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, - u16 guc_id) -{ - policy->h2g.header.action = - XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; - policy->h2g.header.guc_id = guc_id; - policy->count = 0; -} - -#define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ -static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ - u32 data) \ -{ \ - XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ -\ - policy->h2g.klv[policy->count].kl = \ - FIELD_PREP(GUC_KLV_0_KEY, \ - GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ - FIELD_PREP(GUC_KLV_0_LEN, 1); \ - policy->h2g.klv[policy->count].value = data; \ - policy->count++; \ -} - -MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) -MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) -MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) -#undef MAKE_EXEC_QUEUE_POLICY_ADD - -static const int xe_exec_queue_prio_to_guc[] = { - [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, - [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, - [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, - [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, -}; - -static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) -{ - struct exec_queue_policy policy; - struct xe_device *xe = guc_to_xe(guc); - enum xe_exec_queue_priority prio = q->sched_props.priority; - u32 timeslice_us = q->sched_props.timeslice_us; - u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; - - xe_assert(xe, exec_queue_registered(q)); - - __guc_exec_queue_policy_start_klv(&policy, q->guc->id); - __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); - __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); - __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); - - xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, - __guc_exec_queue_policy_action_size(&policy), 0, 0); -} - -static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) -{ - struct exec_queue_policy policy; - - __guc_exec_queue_policy_start_klv(&policy, q->guc->id); - __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); - - xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, - __guc_exec_queue_policy_action_size(&policy), 0, 0); -} - -#define parallel_read(xe_, map_, field_) \ - xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ - field_) -#define parallel_write(xe_, map_, field_, val_) \ - xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ - field_, val_) - -static void __register_mlrc_exec_queue(struct xe_guc *guc, - struct xe_exec_queue *q, - struct guc_ctxt_registration_info *info) -{ -#define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) - struct xe_device *xe = guc_to_xe(guc); - u32 action[MAX_MLRC_REG_SIZE]; - int len = 0; - int i; - - xe_assert(xe, xe_exec_queue_is_parallel(q)); - - action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; - action[len++] = info->flags; - action[len++] = info->context_idx; - action[len++] = info->engine_class; - action[len++] = info->engine_submit_mask; - action[len++] = info->wq_desc_lo; - action[len++] = info->wq_desc_hi; - action[len++] = info->wq_base_lo; - action[len++] = info->wq_base_hi; - action[len++] = info->wq_size; - action[len++] = q->width; - action[len++] = info->hwlrca_lo; - action[len++] = info->hwlrca_hi; - - for (i = 1; i < q->width; ++i) { - struct xe_lrc *lrc = q->lrc[i]; - - action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); - action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); - } - - xe_assert(xe, len <= MAX_MLRC_REG_SIZE); -#undef MAX_MLRC_REG_SIZE - - xe_guc_ct_send(&guc->ct, action, len, 0, 0); -} - -static void __register_exec_queue(struct xe_guc *guc, - struct guc_ctxt_registration_info *info) -{ - u32 action[] = { - XE_GUC_ACTION_REGISTER_CONTEXT, - info->flags, - info->context_idx, - info->engine_class, - info->engine_submit_mask, - info->wq_desc_lo, - info->wq_desc_hi, - info->wq_base_lo, - info->wq_base_hi, - info->wq_size, - info->hwlrca_lo, - info->hwlrca_hi, - }; - - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); -} - -static void register_exec_queue(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct xe_lrc *lrc = q->lrc[0]; - struct guc_ctxt_registration_info info; - - xe_assert(xe, !exec_queue_registered(q)); - - memset(&info, 0, sizeof(info)); - info.context_idx = q->guc->id; - info.engine_class = xe_engine_class_to_guc_class(q->class); - info.engine_submit_mask = q->logical_mask; - info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); - info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); - info.flags = CONTEXT_REGISTRATION_FLAG_KMD; - - if (xe_exec_queue_is_parallel(q)) { - u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); - struct iosys_map map = xe_lrc_parallel_map(lrc); - - info.wq_desc_lo = lower_32_bits(ggtt_addr + - offsetof(struct guc_submit_parallel_scratch, wq_desc)); - info.wq_desc_hi = upper_32_bits(ggtt_addr + - offsetof(struct guc_submit_parallel_scratch, wq_desc)); - info.wq_base_lo = lower_32_bits(ggtt_addr + - offsetof(struct guc_submit_parallel_scratch, wq[0])); - info.wq_base_hi = upper_32_bits(ggtt_addr + - offsetof(struct guc_submit_parallel_scratch, wq[0])); - info.wq_size = WQ_SIZE; - - q->guc->wqi_head = 0; - q->guc->wqi_tail = 0; - xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); - parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); - } - - /* - * We must keep a reference for LR engines if engine is registered with - * the GuC as jobs signal immediately and can't destroy an engine if the - * GuC has a reference to it. - */ - if (xe_exec_queue_is_lr(q)) - xe_exec_queue_get(q); - - set_exec_queue_registered(q); - trace_xe_exec_queue_register(q); - if (xe_exec_queue_is_parallel(q)) - __register_mlrc_exec_queue(guc, q, &info); - else - __register_exec_queue(guc, &info); - init_policies(guc, q); -} - -static u32 wq_space_until_wrap(struct xe_exec_queue *q) -{ - return (WQ_SIZE - q->guc->wqi_tail); -} - -static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); - unsigned int sleep_period_ms = 1; - -#define AVAILABLE_SPACE \ - CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) - if (wqi_size > AVAILABLE_SPACE) { -try_again: - q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); - if (wqi_size > AVAILABLE_SPACE) { - if (sleep_period_ms == 1024) { - xe_gt_reset_async(q->gt); - return -ENODEV; - } - - msleep(sleep_period_ms); - sleep_period_ms <<= 1; - goto try_again; - } - } -#undef AVAILABLE_SPACE - - return 0; -} - -static int wq_noop_append(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); - u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; - - if (wq_wait_for_space(q, wq_space_until_wrap(q))) - return -ENODEV; - - xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw)); - - parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], - FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | - FIELD_PREP(WQ_LEN_MASK, len_dw)); - q->guc->wqi_tail = 0; - - return 0; -} - -static void wq_item_append(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); -#define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ - u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; - u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); - u32 len_dw = (wqi_size / sizeof(u32)) - 1; - int i = 0, j; - - if (wqi_size > wq_space_until_wrap(q)) { - if (wq_noop_append(q)) - return; - } - if (wq_wait_for_space(q, wqi_size)) - return; - - wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | - FIELD_PREP(WQ_LEN_MASK, len_dw); - wqi[i++] = xe_lrc_descriptor(q->lrc[0]); - wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | - FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); - wqi[i++] = 0; - for (j = 1; j < q->width; ++j) { - struct xe_lrc *lrc = q->lrc[j]; - - wqi[i++] = lrc->ring.tail / sizeof(u64); - } - - xe_assert(xe, i == wqi_size / sizeof(u32)); - - iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, - wq[q->guc->wqi_tail / sizeof(u32)])); - xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); - q->guc->wqi_tail += wqi_size; - xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE); - - xe_device_wmb(xe); - - map = xe_lrc_parallel_map(q->lrc[0]); - parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); -} - -#define RESUME_PENDING ~0x0ull -static void submit_exec_queue(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct xe_lrc *lrc = q->lrc[0]; - u32 action[3]; - u32 g2h_len = 0; - u32 num_g2h = 0; - int len = 0; - bool extra_submit = false; - - xe_assert(xe, exec_queue_registered(q)); - - if (xe_exec_queue_is_parallel(q)) - wq_item_append(q); - else - xe_lrc_set_ring_tail(lrc, lrc->ring.tail); - - if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) - return; - - if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { - action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; - action[len++] = q->guc->id; - action[len++] = GUC_CONTEXT_ENABLE; - g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; - num_g2h = 1; - if (xe_exec_queue_is_parallel(q)) - extra_submit = true; - - q->guc->resume_time = RESUME_PENDING; - set_exec_queue_pending_enable(q); - set_exec_queue_enabled(q); - trace_xe_exec_queue_scheduling_enable(q); - } else { - action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; - action[len++] = q->guc->id; - trace_xe_exec_queue_submit(q); - } - - xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); - - if (extra_submit) { - len = 0; - action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; - action[len++] = q->guc->id; - trace_xe_exec_queue_submit(q); - - xe_guc_ct_send(&guc->ct, action, len, 0, 0); - } -} - -static struct dma_fence * -guc_exec_queue_run_job(struct drm_sched_job *drm_job) -{ - struct xe_sched_job *job = to_xe_sched_job(drm_job); - struct xe_exec_queue *q = job->q; - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - bool lr = xe_exec_queue_is_lr(q); - - xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || - exec_queue_banned(q) || exec_queue_suspended(q)); - - trace_xe_sched_job_run(job); - - if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) { - if (!exec_queue_registered(q)) - register_exec_queue(q); - if (!lr) /* LR jobs are emitted in the exec IOCTL */ - q->ring_ops->emit_job(job); - submit_exec_queue(q); - } - - if (lr) { - xe_sched_job_set_error(job, -EOPNOTSUPP); - return NULL; - } else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) { - return job->fence; - } else { - return dma_fence_get(job->fence); - } -} - -static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) -{ - struct xe_sched_job *job = to_xe_sched_job(drm_job); - - xe_exec_queue_update_run_ticks(job->q); - - trace_xe_sched_job_free(job); - xe_sched_job_put(job); -} - -static int guc_read_stopped(struct xe_guc *guc) -{ - return atomic_read(&guc->submission_state.stopped); -} - -#define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ - u32 action[] = { \ - XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ - q->guc->id, \ - GUC_CONTEXT_##enable_disable, \ - } - -static void disable_scheduling_deregister(struct xe_guc *guc, - struct xe_exec_queue *q) -{ - MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); - struct xe_device *xe = guc_to_xe(guc); - int ret; - - set_min_preemption_timeout(guc, q); - smp_rmb(); - ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) || - guc_read_stopped(guc), HZ * 5); - if (!ret) { - struct xe_gpu_scheduler *sched = &q->guc->sched; - - drm_warn(&xe->drm, "Pending enable failed to respond"); - xe_sched_submission_start(sched); - xe_gt_reset_async(q->gt); - xe_sched_tdr_queue_imm(sched); - return; - } - - clear_exec_queue_enabled(q); - set_exec_queue_pending_disable(q); - set_exec_queue_destroyed(q); - trace_xe_exec_queue_scheduling_disable(q); - - /* - * Reserve space for both G2H here as the 2nd G2H is sent from a G2H - * handler and we are not allowed to reserved G2H space in handlers. - */ - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + - G2H_LEN_DW_DEREGISTER_CONTEXT, 2); -} - -static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - - /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ - wake_up_all(&xe->ufence_wq); - - if (xe_exec_queue_is_lr(q)) - queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr); - else - xe_sched_tdr_queue_imm(&q->guc->sched); -} - -/** - * xe_guc_submit_wedge() - Wedge GuC submission - * @guc: the GuC object - * - * Save exec queue's registered with GuC state by taking a ref to each queue. - * Register a DRMM handler to drop refs upon driver unload. - */ -void xe_guc_submit_wedge(struct xe_guc *guc) -{ - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - unsigned long index; - int err; - - xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); - - err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, - guc_submit_wedged_fini, guc); - if (err) { - drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); - return; - } - - mutex_lock(&guc->submission_state.lock); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - if (xe_exec_queue_get_unless_zero(q)) - set_exec_queue_wedged(q); - mutex_unlock(&guc->submission_state.lock); -} - -static bool guc_submit_hint_wedged(struct xe_guc *guc) -{ - struct xe_device *xe = guc_to_xe(guc); - - if (xe->wedged.mode != 2) - return false; - - if (xe_device_wedged(xe)) - return true; - - xe_device_declare_wedged(xe); - - return true; -} - -static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) -{ - struct xe_guc_exec_queue *ge = - container_of(w, struct xe_guc_exec_queue, lr_tdr); - struct xe_exec_queue *q = ge->q; - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct xe_gpu_scheduler *sched = &ge->sched; - bool wedged; - - xe_assert(xe, xe_exec_queue_is_lr(q)); - trace_xe_exec_queue_lr_cleanup(q); - - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); - - /* Kill the run_job / process_msg entry points */ - xe_sched_submission_stop(sched); - - /* - * Engine state now mostly stable, disable scheduling / deregister if - * needed. This cleanup routine might be called multiple times, where - * the actual async engine deregister drops the final engine ref. - * Calling disable_scheduling_deregister will mark the engine as - * destroyed and fire off the CT requests to disable scheduling / - * deregister, which we only want to do once. We also don't want to mark - * the engine as pending_disable again as this may race with the - * xe_guc_deregister_done_handler() which treats it as an unexpected - * state. - */ - if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { - struct xe_guc *guc = exec_queue_to_guc(q); - int ret; - - set_exec_queue_banned(q); - disable_scheduling_deregister(guc, q); - - /* - * Must wait for scheduling to be disabled before signalling - * any fences, if GT broken the GT reset code should signal us. - */ - ret = wait_event_timeout(guc->ct.wq, - !exec_queue_pending_disable(q) || - guc_read_stopped(guc), HZ * 5); - if (!ret) { - drm_warn(&xe->drm, "Schedule disable failed to respond"); - xe_sched_submission_start(sched); - xe_gt_reset_async(q->gt); - return; - } - } - - xe_sched_submission_start(sched); -} - -#define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) - -static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) -{ - struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); - u32 ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]); - u32 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); - u32 timeout_ms = q->sched_props.job_timeout_ms; - u32 diff; - u64 running_time_ms; - - /* - * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch - * possible overflows with a high timeout. - */ - xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); - - if (ctx_timestamp < ctx_job_timestamp) - diff = ctx_timestamp + U32_MAX - ctx_job_timestamp; - else - diff = ctx_timestamp - ctx_job_timestamp; - - /* - * Ensure timeout is within 5% to account for an GuC scheduling latency - */ - running_time_ms = - ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); - - xe_gt_dbg(gt, - "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", - xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), - q->guc->id, running_time_ms, timeout_ms, diff); - - return running_time_ms >= timeout_ms; -} - -static void enable_scheduling(struct xe_exec_queue *q) -{ - MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); - struct xe_guc *guc = exec_queue_to_guc(q); - int ret; - - xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); - xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); - - set_exec_queue_pending_enable(q); - set_exec_queue_enabled(q); - trace_xe_exec_queue_scheduling_enable(q); - - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); - - ret = wait_event_timeout(guc->ct.wq, - !exec_queue_pending_enable(q) || - guc_read_stopped(guc), HZ * 5); - if (!ret || guc_read_stopped(guc)) { - xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); - set_exec_queue_banned(q); - xe_gt_reset_async(q->gt); - xe_sched_tdr_queue_imm(&q->guc->sched); - } -} - -static void disable_scheduling(struct xe_exec_queue *q, bool immediate) -{ - MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); - struct xe_guc *guc = exec_queue_to_guc(q); - - xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); - xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); - - if (immediate) - set_min_preemption_timeout(guc, q); - clear_exec_queue_enabled(q); - set_exec_queue_pending_disable(q); - trace_xe_exec_queue_scheduling_disable(q); - - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); -} - -static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) -{ - u32 action[] = { - XE_GUC_ACTION_DEREGISTER_CONTEXT, - q->guc->id, - }; - - xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); - xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); - - set_exec_queue_destroyed(q); - trace_xe_exec_queue_deregister(q); - - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_DEREGISTER_CONTEXT, 1); -} - -static enum drm_gpu_sched_stat -guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) -{ - struct xe_sched_job *job = to_xe_sched_job(drm_job); - struct xe_sched_job *tmp_job; - struct xe_exec_queue *q = job->q; - struct xe_gpu_scheduler *sched = &q->guc->sched; - struct xe_guc *guc = exec_queue_to_guc(q); -<<<<<<< -======= - const char *process_name = "no process"; ->>>>>>> - int err = -ETIME; - int i = 0; - bool wedged, skip_timeout_check; - - /* - * TDR has fired before free job worker. Common if exec queue - * immediately closed after last fence signaled. - */ - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - guc_exec_queue_free_job(drm_job); - - return DRM_GPU_SCHED_STAT_NOMINAL; - } - - /* Kill the run_job entry point */ - xe_sched_submission_stop(sched); - - /* Must check all state after stopping scheduler */ - skip_timeout_check = exec_queue_reset(q) || - exec_queue_killed_or_banned_or_wedged(q) || - exec_queue_destroyed(q); - - /* Job hasn't started, can't be timed out */ - if (!skip_timeout_check && !xe_sched_job_started(job)) - goto rearm; - - /* - * XXX: Sampling timeout doesn't work in wedged mode as we have to - * modify scheduling state to read timestamp. We could read the - * timestamp from a register to accumulate current running time but this - * doesn't work for SRIOV. For now assuming timeouts in wedged mode are - * genuine timeouts. - */ - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); - - /* Engine state now stable, disable scheduling to check timestamp */ - if (!wedged && exec_queue_registered(q)) { - int ret; - - if (exec_queue_reset(q)) - err = -EIO; - - if (!exec_queue_destroyed(q)) { - /* - * Wait for any pending G2H to flush out before - * modifying state - */ - ret = wait_event_timeout(guc->ct.wq, - !exec_queue_pending_enable(q) || - guc_read_stopped(guc), HZ * 5); - if (!ret || guc_read_stopped(guc)) - goto trigger_reset; - - /* - * Flag communicates to G2H handler that schedule - * disable originated from a timeout check. The G2H then - * avoid triggering cleanup or deregistering the exec - * queue. - */ - set_exec_queue_check_timeout(q); - disable_scheduling(q, skip_timeout_check); - } - - /* - * Must wait for scheduling to be disabled before signalling - * any fences, if GT broken the GT reset code should signal us. - * - * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault - * error) messages which can cause the schedule disable to get - * lost. If this occurs, trigger a GT reset to recover. - */ - smp_rmb(); - ret = wait_event_timeout(guc->ct.wq, - !exec_queue_pending_disable(q) || - guc_read_stopped(guc), HZ * 5); - if (!ret || guc_read_stopped(guc)) { -trigger_reset: - if (!ret) - xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond"); - set_exec_queue_extra_ref(q); - xe_exec_queue_get(q); /* GT reset owns this */ - set_exec_queue_banned(q); - xe_gt_reset_async(q->gt); - xe_sched_tdr_queue_imm(sched); - goto rearm; - } - } - - /* - * Check if job is actually timed out, if so restart job execution and TDR - */ - if (!wedged && !skip_timeout_check && !check_timeout(q, job) && - !exec_queue_reset(q) && exec_queue_registered(q)) { - clear_exec_queue_check_timeout(q); - goto sched_enable; - } - -<<<<<<< - if (q->vm && q->vm->xef) { - process_name = q->vm->xef->process_name; - pid = q->vm->xef->pid; - } - xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", - xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), - q->guc->id, q->flags, process_name, pid); - -======= - xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", - xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), - q->guc->id, q->flags); ->>>>>>> - trace_xe_sched_job_timedout(job); - - if (!exec_queue_killed(q)) - xe_devcoredump(job); - - /* - * Kernel jobs should never fail, nor should VM jobs if they do - * somethings has gone wrong and the GT needs a reset - */ - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, - "Kernel-submitted job timed out\n"); - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), - "VM job timed out on non-killed execqueue\n"); - if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || - (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { - if (!xe_sched_invalidate_job(job, 2)) { - clear_exec_queue_check_timeout(q); - xe_gt_reset_async(q->gt); - goto rearm; - } - } - - /* Finish cleaning up exec queue via deregister */ - set_exec_queue_banned(q); - if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { - set_exec_queue_extra_ref(q); - xe_exec_queue_get(q); - __deregister_exec_queue(guc, q); - } - - /* Stop fence signaling */ - xe_hw_fence_irq_stop(q->fence_irq); - - /* - * Fence state now stable, stop / start scheduler which cleans up any - * fences that are complete - */ - xe_sched_add_pending_job(sched, job); - xe_sched_submission_start(sched); - - xe_guc_exec_queue_trigger_cleanup(q); - - /* Mark all outstanding jobs as bad, thus completing them */ - spin_lock(&sched->base.job_list_lock); - list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) - xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); - spin_unlock(&sched->base.job_list_lock); - - /* Start fence signaling */ - xe_hw_fence_irq_start(q->fence_irq); - - return DRM_GPU_SCHED_STAT_NOMINAL; - -sched_enable: - enable_scheduling(q); -rearm: - /* - * XXX: Ideally want to adjust timeout based on current exection time - * but there is not currently an easy way to do in DRM scheduler. With - * some thought, do this in a follow up. - */ - xe_sched_add_pending_job(sched, job); - xe_sched_submission_start(sched); - - return DRM_GPU_SCHED_STAT_NOMINAL; -} - -static void __guc_exec_queue_fini_async(struct work_struct *w) -{ - struct xe_guc_exec_queue *ge = - container_of(w, struct xe_guc_exec_queue, fini_async); - struct xe_exec_queue *q = ge->q; - struct xe_guc *guc = exec_queue_to_guc(q); - - xe_pm_runtime_get(guc_to_xe(guc)); - trace_xe_exec_queue_destroy(q); - - if (xe_exec_queue_is_lr(q)) - cancel_work_sync(&ge->lr_tdr); - release_guc_id(guc, q); - xe_sched_entity_fini(&ge->entity); - xe_sched_fini(&ge->sched); - - kfree(ge); - xe_exec_queue_fini(q); - xe_pm_runtime_put(guc_to_xe(guc)); -} - -static void guc_exec_queue_fini_async(struct xe_exec_queue *q) -{ - INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); - - /* We must block on kernel engines so slabs are empty on driver unload */ - if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) - __guc_exec_queue_fini_async(&q->guc->fini_async); - else - queue_work(system_wq, &q->guc->fini_async); -} - -static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) -{ - /* - * Might be done from within the GPU scheduler, need to do async as we - * fini the scheduler when the engine is fini'd, the scheduler can't - * complete fini within itself (circular dependency). Async resolves - * this we and don't really care when everything is fini'd, just that it - * is. - */ - guc_exec_queue_fini_async(q); -} - -static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) -{ - struct xe_exec_queue *q = msg->private_data; - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - - xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); - trace_xe_exec_queue_cleanup_entity(q); - - if (exec_queue_registered(q)) - disable_scheduling_deregister(guc, q); - else - __guc_exec_queue_fini(guc, q); -} - -static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) -{ - return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); -} - -static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) -{ - struct xe_exec_queue *q = msg->private_data; - struct xe_guc *guc = exec_queue_to_guc(q); - - if (guc_exec_queue_allowed_to_change_state(q)) - init_policies(guc, q); - kfree(msg); -} - -static void suspend_fence_signal(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - - xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) || - guc_read_stopped(guc)); - xe_assert(xe, q->guc->suspend_pending); - - q->guc->suspend_pending = false; - smp_wmb(); - wake_up(&q->guc->suspend_wait); -} - -static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) -{ - struct xe_exec_queue *q = msg->private_data; - struct xe_guc *guc = exec_queue_to_guc(q); - - if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && - exec_queue_enabled(q)) { - wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING || - guc_read_stopped(guc)); - - if (!guc_read_stopped(guc)) { - s64 since_resume_ms = - ktime_ms_delta(ktime_get(), - q->guc->resume_time); - s64 wait_ms = q->vm->preempt.min_run_period_ms - - since_resume_ms; - - if (wait_ms > 0 && q->guc->resume_time) - msleep(wait_ms); - - set_exec_queue_suspended(q); - disable_scheduling(q, false); - } - } else if (q->guc->suspend_pending) { - set_exec_queue_suspended(q); - suspend_fence_signal(q); - } -} - -static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) -{ - struct xe_exec_queue *q = msg->private_data; - - if (guc_exec_queue_allowed_to_change_state(q)) { - q->guc->resume_time = RESUME_PENDING; - clear_exec_queue_suspended(q); - enable_scheduling(q); - } else { - clear_exec_queue_suspended(q); - } -} - -#define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ -#define SET_SCHED_PROPS 2 -#define SUSPEND 3 -#define RESUME 4 - -static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) -{ - trace_xe_sched_msg_recv(msg); - - switch (msg->opcode) { - case CLEANUP: - __guc_exec_queue_process_msg_cleanup(msg); - break; - case SET_SCHED_PROPS: - __guc_exec_queue_process_msg_set_sched_props(msg); - break; - case SUSPEND: - __guc_exec_queue_process_msg_suspend(msg); - break; - case RESUME: - __guc_exec_queue_process_msg_resume(msg); - break; - default: - XE_WARN_ON("Unknown message type"); - } - - xe_pm_runtime_put(guc_to_xe(exec_queue_to_guc(msg->private_data))); -} - -static const struct drm_sched_backend_ops drm_sched_ops = { - .run_job = guc_exec_queue_run_job, - .free_job = guc_exec_queue_free_job, - .timedout_job = guc_exec_queue_timedout_job, -}; - -static const struct xe_sched_backend_ops xe_sched_ops = { - .process_msg = guc_exec_queue_process_msg, -}; - -static int guc_exec_queue_init(struct xe_exec_queue *q) -{ - struct xe_gpu_scheduler *sched; - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct xe_guc_exec_queue *ge; - long timeout; - int err; - - xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); - - ge = kzalloc(sizeof(*ge), GFP_KERNEL); - if (!ge) - return -ENOMEM; - - q->guc = ge; - ge->q = q; - init_waitqueue_head(&ge->suspend_wait); - - timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : - msecs_to_jiffies(q->sched_props.job_timeout_ms); - err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, - get_submit_wq(guc), - q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64, - timeout, guc_to_gt(guc)->ordered_wq, NULL, - q->name, gt_to_xe(q->gt)->drm.dev); - if (err) - goto err_free; - - sched = &ge->sched; - err = xe_sched_entity_init(&ge->entity, sched); - if (err) - goto err_sched; - - if (xe_exec_queue_is_lr(q)) - INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup); - - mutex_lock(&guc->submission_state.lock); - - err = alloc_guc_id(guc, q); - if (err) - goto err_entity; - - q->entity = &ge->entity; - - if (guc_read_stopped(guc)) - xe_sched_stop(sched); - - mutex_unlock(&guc->submission_state.lock); - - xe_exec_queue_assign_name(q, q->guc->id); - - trace_xe_exec_queue_create(q); - - return 0; - -err_entity: - mutex_unlock(&guc->submission_state.lock); - xe_sched_entity_fini(&ge->entity); -err_sched: - xe_sched_fini(&ge->sched); -err_free: - kfree(ge); - - return err; -} - -static void guc_exec_queue_kill(struct xe_exec_queue *q) -{ - trace_xe_exec_queue_kill(q); - set_exec_queue_killed(q); - xe_guc_exec_queue_trigger_cleanup(q); -} - -static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, - u32 opcode) -{ - xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); - - INIT_LIST_HEAD(&msg->link); - msg->opcode = opcode; - msg->private_data = q; - - trace_xe_sched_msg_add(msg); - xe_sched_add_msg(&q->guc->sched, msg); -} - -#define STATIC_MSG_CLEANUP 0 -#define STATIC_MSG_SUSPEND 1 -#define STATIC_MSG_RESUME 2 -static void guc_exec_queue_fini(struct xe_exec_queue *q) -{ - struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; - - if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) - guc_exec_queue_add_msg(q, msg, CLEANUP); - else - __guc_exec_queue_fini(exec_queue_to_guc(q), q); -} - -static int guc_exec_queue_set_priority(struct xe_exec_queue *q, - enum xe_exec_queue_priority priority) -{ - struct xe_sched_msg *msg; - - if (q->sched_props.priority == priority || - exec_queue_killed_or_banned_or_wedged(q)) - return 0; - - msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (!msg) - return -ENOMEM; - - q->sched_props.priority = priority; - guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); - - return 0; -} - -static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) -{ - struct xe_sched_msg *msg; - - if (q->sched_props.timeslice_us == timeslice_us || - exec_queue_killed_or_banned_or_wedged(q)) - return 0; - - msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (!msg) - return -ENOMEM; - - q->sched_props.timeslice_us = timeslice_us; - guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); - - return 0; -} - -static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, - u32 preempt_timeout_us) -{ - struct xe_sched_msg *msg; - - if (q->sched_props.preempt_timeout_us == preempt_timeout_us || - exec_queue_killed_or_banned_or_wedged(q)) - return 0; - - msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (!msg) - return -ENOMEM; - - q->sched_props.preempt_timeout_us = preempt_timeout_us; - guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); - - return 0; -} - -static int guc_exec_queue_suspend(struct xe_exec_queue *q) -{ - struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; - - if (exec_queue_killed_or_banned_or_wedged(q) || q->guc->suspend_pending) - return -EINVAL; - - q->guc->suspend_pending = true; - guc_exec_queue_add_msg(q, msg, SUSPEND); - - return 0; -} - -static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - - wait_event(q->guc->suspend_wait, !q->guc->suspend_pending || - guc_read_stopped(guc)); -} - -static void guc_exec_queue_resume(struct xe_exec_queue *q) -{ - struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - - xe_assert(xe, !q->guc->suspend_pending); - - guc_exec_queue_add_msg(q, msg, RESUME); -} - -static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) -{ - return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); -} - -/* - * All of these functions are an abstraction layer which other parts of XE can - * use to trap into the GuC backend. All of these functions, aside from init, - * really shouldn't do much other than trap into the DRM scheduler which - * synchronizes these operations. - */ -static const struct xe_exec_queue_ops guc_exec_queue_ops = { - .init = guc_exec_queue_init, - .kill = guc_exec_queue_kill, - .fini = guc_exec_queue_fini, - .set_priority = guc_exec_queue_set_priority, - .set_timeslice = guc_exec_queue_set_timeslice, - .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, - .suspend = guc_exec_queue_suspend, - .suspend_wait = guc_exec_queue_suspend_wait, - .resume = guc_exec_queue_resume, - .reset_status = guc_exec_queue_reset_status, -}; - -static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) -{ - struct xe_gpu_scheduler *sched = &q->guc->sched; - - /* Stop scheduling + flush any DRM scheduler operations */ - xe_sched_submission_stop(sched); - - /* Clean up lost G2H + reset engine state */ - if (exec_queue_registered(q)) { - if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) - xe_exec_queue_put(q); - else if (exec_queue_destroyed(q)) - __guc_exec_queue_fini(guc, q); - } - if (q->guc->suspend_pending) { - set_exec_queue_suspended(q); - suspend_fence_signal(q); - } - atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | - EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | - EXEC_QUEUE_STATE_SUSPENDED, - &q->guc->state); - q->guc->resume_time = 0; - trace_xe_exec_queue_stop(q); - - /* - * Ban any engine (aside from kernel and engines used for VM ops) with a - * started but not complete job or if a job has gone through a GT reset - * more than twice. - */ - if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { - struct xe_sched_job *job = xe_sched_first_pending_job(sched); - bool ban = false; - - if (job) { - if ((xe_sched_job_started(job) && - !xe_sched_job_completed(job)) || - xe_sched_invalidate_job(job, 2)) { - trace_xe_sched_job_ban(job); - ban = true; - } - } else if (xe_exec_queue_is_lr(q) && - (xe_lrc_ring_head(q->lrc[0]) != xe_lrc_ring_tail(q->lrc[0]))) { - ban = true; - } - - if (ban) { - set_exec_queue_banned(q); - xe_guc_exec_queue_trigger_cleanup(q); - } - } -} - -int xe_guc_submit_reset_prepare(struct xe_guc *guc) -{ - int ret; - - /* - * Using an atomic here rather than submission_state.lock as this - * function can be called while holding the CT lock (engine reset - * failure). submission_state.lock needs the CT lock to resubmit jobs. - * Atomic is not ideal, but it works to prevent against concurrent reset - * and releasing any TDRs waiting on guc->submission_state.stopped. - */ - ret = atomic_fetch_or(1, &guc->submission_state.stopped); - smp_wmb(); - wake_up_all(&guc->ct.wq); - - return ret; -} - -void xe_guc_submit_reset_wait(struct xe_guc *guc) -{ - wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || - !guc_read_stopped(guc)); -} - -void xe_guc_submit_stop(struct xe_guc *guc) -{ - struct xe_exec_queue *q; - unsigned long index; - struct xe_device *xe = guc_to_xe(guc); - - xe_assert(xe, guc_read_stopped(guc) == 1); - - mutex_lock(&guc->submission_state.lock); - - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - guc_exec_queue_stop(guc, q); - - mutex_unlock(&guc->submission_state.lock); - - /* - * No one can enter the backend at this point, aside from new engine - * creation which is protected by guc->submission_state.lock. - */ - -} - -static void guc_exec_queue_start(struct xe_exec_queue *q) -{ - struct xe_gpu_scheduler *sched = &q->guc->sched; - - if (!exec_queue_killed_or_banned_or_wedged(q)) { - int i; - - trace_xe_exec_queue_resubmit(q); - for (i = 0; i < q->width; ++i) - xe_lrc_set_ring_head(q->lrc[i], q->lrc[i]->ring.tail); - xe_sched_resubmit_jobs(sched); - } - - xe_sched_submission_start(sched); -} - -int xe_guc_submit_start(struct xe_guc *guc) -{ - struct xe_exec_queue *q; - unsigned long index; - struct xe_device *xe = guc_to_xe(guc); - - xe_assert(xe, guc_read_stopped(guc) == 1); - - mutex_lock(&guc->submission_state.lock); - atomic_dec(&guc->submission_state.stopped); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - guc_exec_queue_start(q); - mutex_unlock(&guc->submission_state.lock); - - wake_up_all(&guc->ct.wq); - - return 0; -} - -static struct xe_exec_queue * -g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) -{ - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - - if (unlikely(guc_id >= GUC_ID_MAX)) { - drm_err(&xe->drm, "Invalid guc_id %u", guc_id); - return NULL; - } - - q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); - if (unlikely(!q)) { - drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); - return NULL; - } - - xe_assert(xe, guc_id >= q->guc->id); - xe_assert(xe, guc_id < (q->guc->id + q->width)); - - return q; -} - -static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) -{ - u32 action[] = { - XE_GUC_ACTION_DEREGISTER_CONTEXT, - q->guc->id, - }; - - xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); - xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); - - trace_xe_exec_queue_deregister(q); - - xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); -} - -static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, - u32 runnable_state) -{ - trace_xe_exec_queue_scheduling_done(q); - - if (runnable_state == 1) { - xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); - - q->guc->resume_time = ktime_get(); - clear_exec_queue_pending_enable(q); - smp_wmb(); - wake_up_all(&guc->ct.wq); - } else { - bool check_timeout = exec_queue_check_timeout(q); - - xe_gt_assert(guc_to_gt(guc), runnable_state == 0); - xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); - - clear_exec_queue_pending_disable(q); - if (q->guc->suspend_pending) { - suspend_fence_signal(q); - } else { - if (exec_queue_banned(q) || check_timeout) { - smp_wmb(); - wake_up_all(&guc->ct.wq); - } - if (!check_timeout) - deregister_exec_queue(guc, q); - } - } -} - -int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - u32 guc_id = msg[0]; - u32 runnable_state = msg[1]; - - if (unlikely(len < 2)) { - drm_err(&xe->drm, "Invalid length %u", len); - return -EPROTO; - } - - q = g2h_exec_queue_lookup(guc, guc_id); - if (unlikely(!q)) - return -EPROTO; - - if (unlikely(!exec_queue_pending_enable(q) && - !exec_queue_pending_disable(q))) { - xe_gt_err(guc_to_gt(guc), - "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", - atomic_read(&q->guc->state), q->guc->id, - runnable_state); - return -EPROTO; - } - - handle_sched_done(guc, q, runnable_state); - - return 0; -} - -static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) -{ - trace_xe_exec_queue_deregister_done(q); - - clear_exec_queue_registered(q); - - if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) - xe_exec_queue_put(q); - else - __guc_exec_queue_fini(guc, q); -} - -int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - u32 guc_id = msg[0]; - - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); - return -EPROTO; - } - - q = g2h_exec_queue_lookup(guc, guc_id); - if (unlikely(!q)) - return -EPROTO; - - if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || - exec_queue_pending_enable(q) || exec_queue_enabled(q)) { - xe_gt_err(guc_to_gt(guc), - "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", - atomic_read(&q->guc->state), q->guc->id); - return -EPROTO; - } - - handle_deregister_done(guc, q); - - return 0; -} - -int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - u32 guc_id = msg[0]; - - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); - return -EPROTO; - } - - q = g2h_exec_queue_lookup(guc, guc_id); - if (unlikely(!q)) - return -EPROTO; - - xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d", - xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); - - /* FIXME: Do error capture, most likely async */ - - trace_xe_exec_queue_reset(q); - - /* - * A banned engine is a NOP at this point (came from - * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel - * jobs by setting timeout of the job to the minimum value kicking - * guc_exec_queue_timedout_job. - */ - set_exec_queue_reset(q); - if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) - xe_guc_exec_queue_trigger_cleanup(q); - - return 0; -} - -int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, - u32 len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - u32 guc_id = msg[0]; - - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); - return -EPROTO; - } - - q = g2h_exec_queue_lookup(guc, guc_id); - if (unlikely(!q)) - return -EPROTO; - - xe_gt_dbg(gt, "Engine memory cat error: engine_class=%s, logical_mask: 0x%x, guc_id=%d", - xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); - - trace_xe_exec_queue_memory_cat_error(q); - - /* Treat the same as engine reset */ - set_exec_queue_reset(q); - if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) - xe_guc_exec_queue_trigger_cleanup(q); - - return 0; -} - -int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_device *xe = guc_to_xe(guc); - u8 guc_class, instance; - u32 reason; - - if (unlikely(len != 3)) { - drm_err(&xe->drm, "Invalid length %u", len); - return -EPROTO; - } - - guc_class = msg[0]; - instance = msg[1]; - reason = msg[2]; - - /* Unexpected failure of a hardware feature, log an actual error */ - drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", - guc_class, instance, reason); - - xe_gt_reset_async(guc_to_gt(guc)); - - return 0; -} - -static void -guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, - struct xe_guc_submit_exec_queue_snapshot *snapshot) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); - int i; - - snapshot->guc.wqi_head = q->guc->wqi_head; - snapshot->guc.wqi_tail = q->guc->wqi_tail; - snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); - snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); - snapshot->parallel.wq_desc.status = parallel_read(xe, map, - wq_desc.wq_status); - - if (snapshot->parallel.wq_desc.head != - snapshot->parallel.wq_desc.tail) { - for (i = snapshot->parallel.wq_desc.head; - i != snapshot->parallel.wq_desc.tail; - i = (i + sizeof(u32)) % WQ_SIZE) - snapshot->parallel.wq[i / sizeof(u32)] = - parallel_read(xe, map, wq[i / sizeof(u32)]); - } -} - -static void -guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, - struct drm_printer *p) -{ - int i; - - drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", - snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); - drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", - snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); - drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); - - if (snapshot->parallel.wq_desc.head != - snapshot->parallel.wq_desc.tail) { - for (i = snapshot->parallel.wq_desc.head; - i != snapshot->parallel.wq_desc.tail; - i = (i + sizeof(u32)) % WQ_SIZE) - drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), - snapshot->parallel.wq[i / sizeof(u32)]); - } -} - -/** - * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. - * @q: faulty exec queue - * - * This can be printed out in a later stage like during dev_coredump - * analysis. - * - * Returns: a GuC Submit Engine snapshot object that must be freed by the - * caller, using `xe_guc_exec_queue_snapshot_free`. - */ -struct xe_guc_submit_exec_queue_snapshot * -xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) -{ - struct xe_gpu_scheduler *sched = &q->guc->sched; - struct xe_guc_submit_exec_queue_snapshot *snapshot; - int i; - - snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); - - if (!snapshot) - return NULL; - - snapshot->guc.id = q->guc->id; - memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); - snapshot->class = q->class; - snapshot->logical_mask = q->logical_mask; - snapshot->width = q->width; - snapshot->refcount = kref_read(&q->refcount); - snapshot->sched_timeout = sched->base.timeout; - snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; - snapshot->sched_props.preempt_timeout_us = - q->sched_props.preempt_timeout_us; - - snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *), - GFP_ATOMIC); - - if (snapshot->lrc) { - for (i = 0; i < q->width; ++i) { - struct xe_lrc *lrc = q->lrc[i]; - - snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); - } - } - - snapshot->schedule_state = atomic_read(&q->guc->state); - snapshot->exec_queue_flags = q->flags; - - snapshot->parallel_execution = xe_exec_queue_is_parallel(q); - if (snapshot->parallel_execution) - guc_exec_queue_wq_snapshot_capture(q, snapshot); - - spin_lock(&sched->base.job_list_lock); - snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list); - snapshot->pending_list = kmalloc_array(snapshot->pending_list_size, - sizeof(struct pending_list_snapshot), - GFP_ATOMIC); - - if (snapshot->pending_list) { - struct xe_sched_job *job_iter; - - i = 0; - list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) { - snapshot->pending_list[i].seqno = - xe_sched_job_seqno(job_iter); - snapshot->pending_list[i].fence = - dma_fence_is_signaled(job_iter->fence) ? 1 : 0; - snapshot->pending_list[i].finished = - dma_fence_is_signaled(&job_iter->drm.s_fence->finished) - ? 1 : 0; - i++; - } - } - - spin_unlock(&sched->base.job_list_lock); - - return snapshot; -} - -/** - * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. - * @snapshot: Previously captured snapshot of job. - * - * This captures some data that requires taking some locks, so it cannot be done in signaling path. - */ -void -xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) -{ - int i; - - if (!snapshot || !snapshot->lrc) - return; - - for (i = 0; i < snapshot->width; ++i) - xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); -} - -/** - * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. - * @snapshot: GuC Submit Engine snapshot object. - * @p: drm_printer where it will be printed out. - * - * This function prints out a given GuC Submit Engine snapshot object. - */ -void -xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, - struct drm_printer *p) -{ - int i; - - if (!snapshot) - return; - - drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id); - drm_printf(p, "\tName: %s\n", snapshot->name); - drm_printf(p, "\tClass: %d\n", snapshot->class); - drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); - drm_printf(p, "\tWidth: %d\n", snapshot->width); - drm_printf(p, "\tRef: %d\n", snapshot->refcount); - drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); - drm_printf(p, "\tTimeslice: %u (us)\n", - snapshot->sched_props.timeslice_us); - drm_printf(p, "\tPreempt timeout: %u (us)\n", - snapshot->sched_props.preempt_timeout_us); - - for (i = 0; snapshot->lrc && i < snapshot->width; ++i) - xe_lrc_snapshot_print(snapshot->lrc[i], p); - - drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); - drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); - - if (snapshot->parallel_execution) - guc_exec_queue_wq_snapshot_print(snapshot, p); - - for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size; - i++) - drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", - snapshot->pending_list[i].seqno, - snapshot->pending_list[i].fence, - snapshot->pending_list[i].finished); -} - -/** - * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given - * snapshot. - * @snapshot: GuC Submit Engine snapshot object. - * - * This function free all the memory that needed to be allocated at capture - * time. - */ -void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) -{ - int i; - - if (!snapshot) - return; - - if (snapshot->lrc) { - for (i = 0; i < snapshot->width; i++) - xe_lrc_snapshot_free(snapshot->lrc[i]); - kfree(snapshot->lrc); - } - kfree(snapshot->pending_list); - kfree(snapshot); -} - -static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) -{ - struct xe_guc_submit_exec_queue_snapshot *snapshot; - - snapshot = xe_guc_exec_queue_snapshot_capture(q); - xe_guc_exec_queue_snapshot_print(snapshot, p); - xe_guc_exec_queue_snapshot_free(snapshot); -} - -/** - * xe_guc_submit_print - GuC Submit Print. - * @guc: GuC. - * @p: drm_printer where it will be printed out. - * - * This function capture and prints snapshots of **all** GuC Engines. - */ -void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) -{ - struct xe_exec_queue *q; - unsigned long index; - - if (!xe_device_uc_enabled(guc_to_xe(guc))) - return; - - mutex_lock(&guc->submission_state.lock); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - guc_exec_queue_print(q, p); - mutex_unlock(&guc->submission_state.lock); -} diff --git a/rr-cache/ec396c63eece0f0c56a405443dd6f67970e78a95/postimage b/rr-cache/ec396c63eece0f0c56a405443dd6f67970e78a95/postimage deleted file mode 100644 index 354ee9045efc..000000000000 --- a/rr-cache/ec396c63eece0f0c56a405443dd6f67970e78a95/postimage +++ /dev/null @@ -1,2521 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2023-2024 Intel Corporation - */ - -#include <linux/anon_inodes.h> -#include <linux/delay.h> -#include <linux/nospec.h> -#include <linux/poll.h> - -#include <drm/drm_drv.h> -#include <drm/drm_managed.h> -#include <uapi/drm/xe_drm.h> - -#include "abi/guc_actions_slpc_abi.h" -#include "instructions/xe_mi_commands.h" -#include "regs/xe_engine_regs.h" -#include "regs/xe_gt_regs.h" -#include "regs/xe_lrc_layout.h" -#include "regs/xe_oa_regs.h" -#include "xe_assert.h" -#include "xe_bb.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_exec_queue.h" -#include "xe_force_wake.h" -#include "xe_gt.h" -#include "xe_gt_mcr.h" -#include "xe_gt_printk.h" -#include "xe_guc_pc.h" -#include "xe_lrc.h" -#include "xe_macros.h" -#include "xe_mmio.h" -#include "xe_oa.h" -#include "xe_observation.h" -#include "xe_pm.h" -#include "xe_sched_job.h" -#include "xe_sriov.h" - -#define DEFAULT_POLL_FREQUENCY_HZ 200 -#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) -#define XE_OA_UNIT_INVALID U32_MAX - -struct xe_oa_reg { - struct xe_reg addr; - u32 value; -}; - -struct xe_oa_config { - struct xe_oa *oa; - - char uuid[UUID_STRING_LEN + 1]; - int id; - - const struct xe_oa_reg *regs; - u32 regs_len; - - struct attribute_group sysfs_metric; - struct attribute *attrs[2]; - struct kobj_attribute sysfs_metric_id; - - struct kref ref; - struct rcu_head rcu; -}; - -struct flex { - struct xe_reg reg; - u32 offset; - u32 value; -}; - -struct xe_oa_open_param { - u32 oa_unit_id; - bool sample; - u32 metric_set; - enum xe_oa_format_name oa_format; - int period_exponent; - bool disabled; - int exec_queue_id; - int engine_instance; - struct xe_exec_queue *exec_q; - struct xe_hw_engine *hwe; - bool no_preempt; -}; - -struct xe_oa_config_bo { - struct llist_node node; - - struct xe_oa_config *oa_config; - struct xe_bb *bb; -}; - -#define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x - -static const struct xe_oa_format oa_formats[] = { - [XE_OA_FORMAT_C4_B8] = { 7, 64, DRM_FMT(OAG) }, - [XE_OA_FORMAT_A12] = { 0, 64, DRM_FMT(OAG) }, - [XE_OA_FORMAT_A12_B8_C8] = { 2, 128, DRM_FMT(OAG) }, - [XE_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, - [XE_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAR) }, - [XE_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, - [XE_OAC_FORMAT_A24u64_B8_C8] = { 1, 320, DRM_FMT(OAC), HDR_64_BIT }, - [XE_OAC_FORMAT_A22u32_R2u32_B8_C8] = { 2, 192, DRM_FMT(OAC), HDR_64_BIT }, - [XE_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, DRM_FMT(OAM_MPEC), HDR_64_BIT }, - [XE_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, DRM_FMT(OAM_MPEC), HDR_64_BIT }, - [XE_OA_FORMAT_PEC64u64] = { 1, 576, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, - [XE_OA_FORMAT_PEC64u64_B8_C8] = { 1, 640, DRM_FMT(PEC), HDR_64_BIT, 1, 1 }, - [XE_OA_FORMAT_PEC64u32] = { 1, 320, DRM_FMT(PEC), HDR_64_BIT }, - [XE_OA_FORMAT_PEC32u64_G1] = { 5, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, - [XE_OA_FORMAT_PEC32u32_G1] = { 5, 192, DRM_FMT(PEC), HDR_64_BIT }, - [XE_OA_FORMAT_PEC32u64_G2] = { 6, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, - [XE_OA_FORMAT_PEC32u32_G2] = { 6, 192, DRM_FMT(PEC), HDR_64_BIT }, - [XE_OA_FORMAT_PEC36u64_G1_32_G2_4] = { 3, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, - [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, -}; - -static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head) -{ - return tail >= head ? tail - head : - tail + stream->oa_buffer.circ_size - head; -} - -static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n) -{ - return ptr + n >= stream->oa_buffer.circ_size ? - ptr + n - stream->oa_buffer.circ_size : ptr + n; -} - -static void xe_oa_config_release(struct kref *ref) -{ - struct xe_oa_config *oa_config = - container_of(ref, typeof(*oa_config), ref); - - kfree(oa_config->regs); - - kfree_rcu(oa_config, rcu); -} - -static void xe_oa_config_put(struct xe_oa_config *oa_config) -{ - if (!oa_config) - return; - - kref_put(&oa_config->ref, xe_oa_config_release); -} - -static struct xe_oa_config *xe_oa_config_get(struct xe_oa_config *oa_config) -{ - return kref_get_unless_zero(&oa_config->ref) ? oa_config : NULL; -} - -static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_set) -{ - struct xe_oa_config *oa_config; - - rcu_read_lock(); - oa_config = idr_find(&oa->metrics_idr, metrics_set); - if (oa_config) - oa_config = xe_oa_config_get(oa_config); - rcu_read_unlock(); - - return oa_config; -} - -static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo) -{ - xe_oa_config_put(oa_bo->oa_config); - xe_bb_free(oa_bo->bb, NULL); - kfree(oa_bo); -} - -static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream) -{ - return &stream->hwe->oa_unit->regs; -} - -static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) -{ - return xe_mmio_read32(&stream->gt->mmio, __oa_regs(stream)->oa_tail_ptr) & - OAG_OATAILPTR_MASK; -} - -#define oa_report_header_64bit(__s) \ - ((__s)->oa_buffer.format->header == HDR_64_BIT) - -static u64 oa_report_id(struct xe_oa_stream *stream, void *report) -{ - return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; -} - -static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report) -{ - if (oa_report_header_64bit(stream)) - *(u64 *)report = 0; - else - *report = 0; -} - -static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) -{ - return oa_report_header_64bit(stream) ? - *((u64 *)report + 1) : - *((u32 *)report + 1); -} - -static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) -{ - if (oa_report_header_64bit(stream)) - *(u64 *)&report[2] = 0; - else - report[1] = 0; -} - -static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) -{ - u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - int report_size = stream->oa_buffer.format->size; - u32 tail, hw_tail; - unsigned long flags; - bool pollin; - u32 partial_report_size; - - spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - - hw_tail = xe_oa_hw_tail_read(stream); - hw_tail -= gtt_offset; - - /* - * The tail pointer increases in 64 byte (cacheline size), not in report_size - * increments. Also report size may not be a power of 2. Compute potential - * partially landed report in OA buffer. - */ - partial_report_size = xe_oa_circ_diff(stream, hw_tail, stream->oa_buffer.tail); - partial_report_size %= report_size; - - /* Subtract partial amount off the tail */ - hw_tail = xe_oa_circ_diff(stream, hw_tail, partial_report_size); - - tail = hw_tail; - - /* - * Walk the stream backward until we find a report with report id and timestamp - * not 0. We can't tell whether a report has fully landed in memory before the - * report id and timestamp of the following report have landed. - * - * This is assuming that the writes of the OA unit land in memory in the order - * they were written. If not : (╯°□°)╯︵ ┻━┻ - */ - while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) { - void *report = stream->oa_buffer.vaddr + tail; - - if (oa_report_id(stream, report) || oa_timestamp(stream, report)) - break; - - tail = xe_oa_circ_diff(stream, tail, report_size); - } - - if (xe_oa_circ_diff(stream, hw_tail, tail) > report_size) - drm_dbg(&stream->oa->xe->drm, - "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n", - stream->oa_buffer.head, tail, hw_tail); - - stream->oa_buffer.tail = tail; - - pollin = xe_oa_circ_diff(stream, stream->oa_buffer.tail, - stream->oa_buffer.head) >= report_size; - - spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - - return pollin; -} - -static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) -{ - struct xe_oa_stream *stream = - container_of(hrtimer, typeof(*stream), poll_check_timer); - - if (xe_oa_buffer_check_unlocked(stream)) { - stream->pollin = true; - wake_up(&stream->poll_wq); - } - - hrtimer_forward_now(hrtimer, ns_to_ktime(stream->poll_period_ns)); - - return HRTIMER_RESTART; -} - -static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf, - size_t count, size_t *offset, const u8 *report) -{ - int report_size = stream->oa_buffer.format->size; - int report_size_partial; - u8 *oa_buf_end; - - if ((count - *offset) < report_size) - return -ENOSPC; - - buf += *offset; - - oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; - report_size_partial = oa_buf_end - report; - - if (report_size_partial < report_size) { - if (copy_to_user(buf, report, report_size_partial)) - return -EFAULT; - buf += report_size_partial; - - if (copy_to_user(buf, stream->oa_buffer.vaddr, - report_size - report_size_partial)) - return -EFAULT; - } else if (copy_to_user(buf, report, report_size)) { - return -EFAULT; - } - - *offset += report_size; - - return 0; -} - -static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, - size_t count, size_t *offset) -{ - int report_size = stream->oa_buffer.format->size; - u8 *oa_buf_base = stream->oa_buffer.vaddr; - u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - size_t start_offset = *offset; - unsigned long flags; - u32 head, tail; - int ret = 0; - - spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - head = stream->oa_buffer.head; - tail = stream->oa_buffer.tail; - spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - - xe_assert(stream->oa->xe, - head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size); - - for (; xe_oa_circ_diff(stream, tail, head); - head = xe_oa_circ_incr(stream, head, report_size)) { - u8 *report = oa_buf_base + head; - - ret = xe_oa_append_report(stream, buf, count, offset, report); - if (ret) - break; - - if (!(stream->oa_buffer.circ_size % report_size)) { - /* Clear out report id and timestamp to detect unlanded reports */ - oa_report_id_clear(stream, (void *)report); - oa_timestamp_clear(stream, (void *)report); - } else { - u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; - u32 part = oa_buf_end - report; - - /* Zero out the entire report */ - if (report_size <= part) { - memset(report, 0, report_size); - } else { - memset(report, 0, part); - memset(oa_buf_base, 0, report_size - part); - } - } - } - - if (start_offset != *offset) { - struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr; - - spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - xe_mmio_write32(&stream->gt->mmio, oaheadptr, - (head + gtt_offset) & OAG_OAHEADPTR_MASK); - stream->oa_buffer.head = head; - spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - } - - return ret; -} - -static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) -{ - struct xe_mmio *mmio = &stream->gt->mmio; - u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT; - unsigned long flags; - - spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - - xe_mmio_write32(mmio, __oa_regs(stream)->oa_status, 0); - xe_mmio_write32(mmio, __oa_regs(stream)->oa_head_ptr, - gtt_offset & OAG_OAHEADPTR_MASK); - stream->oa_buffer.head = 0; - /* - * PRM says: "This MMIO must be set before the OATAILPTR register and after the - * OAHEADPTR register. This is to enable proper functionality of the overflow bit". - */ - xe_mmio_write32(mmio, __oa_regs(stream)->oa_buffer, oa_buf); - xe_mmio_write32(mmio, __oa_regs(stream)->oa_tail_ptr, - gtt_offset & OAG_OATAILPTR_MASK); - - /* Mark that we need updated tail pointer to read from */ - stream->oa_buffer.tail = 0; - - spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - - /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ - memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size); -} - -static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) -{ - return ((format->counter_select << (ffs(counter_sel_mask) - 1)) & counter_sel_mask) | - REG_FIELD_PREP(OA_OACONTROL_REPORT_BC_MASK, format->bc_report) | - REG_FIELD_PREP(OA_OACONTROL_COUNTER_SIZE_MASK, format->counter_size); -} - -static u32 __oa_ccs_select(struct xe_oa_stream *stream) -{ - u32 val; - - if (stream->hwe->class != XE_ENGINE_CLASS_COMPUTE) - return 0; - - val = REG_FIELD_PREP(OAG_OACONTROL_OA_CCS_SELECT_MASK, stream->hwe->instance); - xe_assert(stream->oa->xe, - REG_FIELD_GET(OAG_OACONTROL_OA_CCS_SELECT_MASK, val) == stream->hwe->instance); - return val; -} - -static void xe_oa_enable(struct xe_oa_stream *stream) -{ - const struct xe_oa_format *format = stream->oa_buffer.format; - const struct xe_oa_regs *regs; - u32 val; - - /* - * BSpec: 46822: Bit 0. Even if stream->sample is 0, for OAR to function, the OA - * buffer must be correctly initialized - */ - xe_oa_init_oa_buffer(stream); - - regs = __oa_regs(stream); - val = __format_to_oactrl(format, regs->oa_ctrl_counter_select_mask) | - __oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE; - - if (GRAPHICS_VER(stream->oa->xe) >= 20 && - stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) - val |= OAG_OACONTROL_OA_PES_DISAG_EN; - - xe_mmio_write32(&stream->gt->mmio, regs->oa_ctrl, val); -} - -static void xe_oa_disable(struct xe_oa_stream *stream) -{ - struct xe_mmio *mmio = &stream->gt->mmio; - - xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctrl, 0); - if (xe_mmio_wait32(mmio, __oa_regs(stream)->oa_ctrl, - OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false)) - drm_err(&stream->oa->xe->drm, - "wait for OA to be disabled timed out\n"); - - if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) { - /* <= XE_METEORLAKE except XE_PVC */ - xe_mmio_write32(mmio, OA_TLB_INV_CR, 1); - if (xe_mmio_wait32(mmio, OA_TLB_INV_CR, 1, 0, 50000, NULL, false)) - drm_err(&stream->oa->xe->drm, - "wait for OA tlb invalidate timed out\n"); - } -} - -static int xe_oa_wait_unlocked(struct xe_oa_stream *stream) -{ - /* We might wait indefinitely if periodic sampling is not enabled */ - if (!stream->periodic) - return -EINVAL; - - return wait_event_interruptible(stream->poll_wq, - xe_oa_buffer_check_unlocked(stream)); -} - -#define OASTATUS_RELEVANT_BITS (OASTATUS_MMIO_TRG_Q_FULL | OASTATUS_COUNTER_OVERFLOW | \ - OASTATUS_BUFFER_OVERFLOW | OASTATUS_REPORT_LOST) - -static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf, - size_t count, size_t *offset) -{ - /* Only clear our bits to avoid side-effects */ - stream->oa_status = xe_mmio_rmw32(&stream->gt->mmio, __oa_regs(stream)->oa_status, - OASTATUS_RELEVANT_BITS, 0); - /* - * Signal to userspace that there is non-zero OA status to read via - * @DRM_XE_OBSERVATION_IOCTL_STATUS observation stream fd ioctl - */ - if (stream->oa_status & OASTATUS_RELEVANT_BITS) - return -EIO; - - return xe_oa_append_reports(stream, buf, count, offset); -} - -static ssize_t xe_oa_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - struct xe_oa_stream *stream = file->private_data; - size_t offset = 0; - int ret; - - /* Can't read from disabled streams */ - if (!stream->enabled || !stream->sample) - return -EINVAL; - - if (!(file->f_flags & O_NONBLOCK)) { - do { - ret = xe_oa_wait_unlocked(stream); - if (ret) - return ret; - - mutex_lock(&stream->stream_lock); - ret = __xe_oa_read(stream, buf, count, &offset); - mutex_unlock(&stream->stream_lock); - } while (!offset && !ret); - } else { - mutex_lock(&stream->stream_lock); - ret = __xe_oa_read(stream, buf, count, &offset); - mutex_unlock(&stream->stream_lock); - } - - /* - * Typically we clear pollin here in order to wait for the new hrtimer callback - * before unblocking. The exception to this is if __xe_oa_read returns -ENOSPC, - * which means that more OA data is available than could fit in the user provided - * buffer. In this case we want the next poll() call to not block. - * - * Also in case of -EIO, we have already waited for data before returning - * -EIO, so need to wait again - */ - if (ret != -ENOSPC && ret != -EIO) - stream->pollin = false; - - /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, -EINVAL, ... */ - return offset ?: (ret ?: -EAGAIN); -} - -static __poll_t xe_oa_poll_locked(struct xe_oa_stream *stream, - struct file *file, poll_table *wait) -{ - __poll_t events = 0; - - poll_wait(file, &stream->poll_wq, wait); - - /* - * We don't explicitly check whether there's something to read here since this - * path may be hot depending on what else userspace is polling, or on the timeout - * in use. We rely on hrtimer xe_oa_poll_check_timer_cb to notify us when there - * are samples to read - */ - if (stream->pollin) - events |= EPOLLIN; - - return events; -} - -static __poll_t xe_oa_poll(struct file *file, poll_table *wait) -{ - struct xe_oa_stream *stream = file->private_data; - __poll_t ret; - - mutex_lock(&stream->stream_lock); - ret = xe_oa_poll_locked(stream, file, wait); - mutex_unlock(&stream->stream_lock); - - return ret; -} - -static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) -{ - struct xe_sched_job *job; - struct dma_fence *fence; - long timeout; - int err = 0; - - /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */ - job = xe_bb_create_job(stream->k_exec_q, bb); - if (IS_ERR(job)) { - err = PTR_ERR(job); - goto exit; - } - - xe_sched_job_arm(job); - fence = dma_fence_get(&job->drm.s_fence->finished); - xe_sched_job_push(job); - - timeout = dma_fence_wait_timeout(fence, false, HZ); - dma_fence_put(fence); - if (timeout < 0) - err = timeout; - else if (!timeout) - err = -ETIME; -exit: - return err; -} - -static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs) -{ - u32 i; - -#define MI_LOAD_REGISTER_IMM_MAX_REGS (126) - - for (i = 0; i < n_regs; i++) { - if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { - u32 n_lri = min_t(u32, n_regs - i, - MI_LOAD_REGISTER_IMM_MAX_REGS); - - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(n_lri); - } - bb->cs[bb->len++] = reg_data[i].addr.addr; - bb->cs[bb->len++] = reg_data[i].value; - } -} - -static int num_lri_dwords(int num_regs) -{ - int count = 0; - - if (num_regs > 0) { - count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS); - count += num_regs * 2; - } - - return count; -} - -static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream) -{ - xe_bo_unpin_map_no_vm(stream->oa_buffer.bo); -} - -static void xe_oa_free_configs(struct xe_oa_stream *stream) -{ - struct xe_oa_config_bo *oa_bo, *tmp; - - xe_oa_config_put(stream->oa_config); - llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node) - free_oa_config_bo(oa_bo); -} - -static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, - struct xe_bb *bb, const struct flex *flex, u32 count) -{ - u32 offset = xe_bo_ggtt_addr(lrc->bo); - - do { - bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); - bb->cs[bb->len++] = offset + flex->offset * sizeof(u32); - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = flex->value; - - } while (flex++, --count); -} - -static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc, - const struct flex *flex, u32 count) -{ - struct xe_bb *bb; - int err; - - bb = xe_bb_new(stream->gt, 4 * count, false); - if (IS_ERR(bb)) { - err = PTR_ERR(bb); - goto exit; - } - - xe_oa_store_flex(stream, lrc, bb, flex, count); - - err = xe_oa_submit_bb(stream, bb); - xe_bb_free(bb, NULL); -exit: - return err; -} - -static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri) -{ - struct xe_bb *bb; - int err; - - bb = xe_bb_new(stream->gt, 3, false); - if (IS_ERR(bb)) { - err = PTR_ERR(bb); - goto exit; - } - - write_cs_mi_lri(bb, reg_lri, 1); - - err = xe_oa_submit_bb(stream, bb); - xe_bb_free(bb, NULL); -exit: - return err; -} - -static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) -{ - const struct xe_oa_format *format = stream->oa_buffer.format; - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); - u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | - (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); - - struct flex regs_context[] = { - { - OACTXCONTROL(stream->hwe->mmio_base), - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, - enable ? OA_COUNTER_RESUME : 0, - }, - { - RING_CONTEXT_CONTROL(stream->hwe->mmio_base), - regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, - enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) - }, - }; - struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol }; - int err; - - /* Modify stream hwe context image with regs_context */ - err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], - regs_context, ARRAY_SIZE(regs_context)); - if (err) - return err; - - /* Apply reg_lri using LRI */ - return xe_oa_load_with_lri(stream, ®_lri); -} - -static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) -{ - const struct xe_oa_format *format = stream->oa_buffer.format; - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); - u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | - (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); - struct flex regs_context[] = { - { - OACTXCONTROL(stream->hwe->mmio_base), - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, - enable ? OA_COUNTER_RESUME : 0, - }, - { - RING_CONTEXT_CONTROL(stream->hwe->mmio_base), - regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, - enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | - _MASKED_FIELD(CTX_CTRL_RUN_ALONE, - enable ? CTX_CTRL_RUN_ALONE : 0), - }, - }; - struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol }; - int err; - - /* Set ccs select to enable programming of OAC_OACONTROL */ - xe_mmio_write32(&stream->gt->mmio, __oa_regs(stream)->oa_ctrl, - __oa_ccs_select(stream)); - - /* Modify stream hwe context image with regs_context */ - err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], - regs_context, ARRAY_SIZE(regs_context)); - if (err) - return err; - - /* Apply reg_lri using LRI */ - return xe_oa_load_with_lri(stream, ®_lri); -} - -static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable) -{ - switch (stream->hwe->class) { - case XE_ENGINE_CLASS_RENDER: - return xe_oa_configure_oar_context(stream, enable); - case XE_ENGINE_CLASS_COMPUTE: - return xe_oa_configure_oac_context(stream, enable); - default: - /* Video engines do not support MI_REPORT_PERF_COUNT */ - return 0; - } -} - -#define HAS_OA_BPC_REPORTING(xe) (GRAPHICS_VERx100(xe) >= 1255) - -static u32 oag_configure_mmio_trigger(const struct xe_oa_stream *stream, bool enable) -{ - return _MASKED_FIELD(OAG_OA_DEBUG_DISABLE_MMIO_TRG, - enable && stream && stream->sample ? - 0 : OAG_OA_DEBUG_DISABLE_MMIO_TRG); -} - -static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) -{ - struct xe_mmio *mmio = &stream->gt->mmio; - u32 sqcnt1; - - /* - * Wa_1508761755:xehpsdv, dg2 - * Enable thread stall DOP gating and EU DOP gating. - */ - if (stream->oa->xe->info.platform == XE_DG2) { - xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, - _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE)); - xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, - _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); - } - - xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug, - oag_configure_mmio_trigger(stream, false)); - - /* disable the context save/restore or OAR counters */ - if (stream->exec_q) - xe_oa_configure_oa_context(stream, false); - - /* Make sure we disable noa to save power. */ - xe_mmio_rmw32(mmio, RPM_CONFIG1, GT_NOA_ENABLE, 0); - - sqcnt1 = SQCNT1_PMON_ENABLE | - (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); - - /* Reset PMON Enable to save power. */ - xe_mmio_rmw32(mmio, XELPMP_SQCNT1, sqcnt1, 0); -} - -static void xe_oa_stream_destroy(struct xe_oa_stream *stream) -{ - struct xe_oa_unit *u = stream->hwe->oa_unit; - struct xe_gt *gt = stream->hwe->gt; - - if (WARN_ON(stream != u->exclusive_stream)) - return; - - WRITE_ONCE(u->exclusive_stream, NULL); - - mutex_destroy(&stream->stream_lock); - - xe_oa_disable_metric_set(stream); - xe_exec_queue_put(stream->k_exec_q); - - xe_oa_free_oa_buffer(stream); - - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_pm_runtime_put(stream->oa->xe); - - /* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */ - if (stream->override_gucrc) - xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); - - xe_oa_free_configs(stream); -} - -static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) -{ - struct xe_bo *bo; - - BUILD_BUG_ON_NOT_POWER_OF_2(XE_OA_BUFFER_SIZE); - BUILD_BUG_ON(XE_OA_BUFFER_SIZE < SZ_128K || XE_OA_BUFFER_SIZE > SZ_16M); - - bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL, - XE_OA_BUFFER_SIZE, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT); - if (IS_ERR(bo)) - return PTR_ERR(bo); - - stream->oa_buffer.bo = bo; - /* mmap implementation requires OA buffer to be in system memory */ - xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0); - stream->oa_buffer.vaddr = bo->vmap.vaddr; - return 0; -} - -static struct xe_oa_config_bo * -__xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_config) -{ - struct xe_oa_config_bo *oa_bo; - size_t config_length; - struct xe_bb *bb; - - oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL); - if (!oa_bo) - return ERR_PTR(-ENOMEM); - - config_length = num_lri_dwords(oa_config->regs_len); - config_length = ALIGN(sizeof(u32) * config_length, XE_PAGE_SIZE) / sizeof(u32); - - bb = xe_bb_new(stream->gt, config_length, false); - if (IS_ERR(bb)) - goto err_free; - - write_cs_mi_lri(bb, oa_config->regs, oa_config->regs_len); - - oa_bo->bb = bb; - oa_bo->oa_config = xe_oa_config_get(oa_config); - llist_add(&oa_bo->node, &stream->oa_config_bos); - - return oa_bo; -err_free: - kfree(oa_bo); - return ERR_CAST(bb); -} - -static struct xe_oa_config_bo * -xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_config) -{ - struct xe_oa_config_bo *oa_bo; - - /* Look for the buffer in the already allocated BOs attached to the stream */ - llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) { - if (oa_bo->oa_config == oa_config && - memcmp(oa_bo->oa_config->uuid, oa_config->uuid, - sizeof(oa_config->uuid)) == 0) - goto out; - } - - oa_bo = __xe_oa_alloc_config_buffer(stream, oa_config); -out: - return oa_bo; -} - -static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config) -{ -#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 - struct xe_oa_config_bo *oa_bo; - int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US; - - oa_bo = xe_oa_alloc_config_buffer(stream, config); - if (IS_ERR(oa_bo)) { - err = PTR_ERR(oa_bo); - goto exit; - } - - err = xe_oa_submit_bb(stream, oa_bo->bb); - - /* Additional empirical delay needed for NOA programming after registers are written */ - usleep_range(us, 2 * us); -exit: - return err; -} - -static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) -{ - /* If user didn't require OA reports, ask HW not to emit ctx switch reports */ - return _MASKED_FIELD(OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS, - stream->sample ? - 0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); -} - -static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) -{ - struct xe_mmio *mmio = &stream->gt->mmio; - u32 oa_debug, sqcnt1; - int ret; - - /* - * Wa_1508761755:xehpsdv, dg2 - * EU NOA signals behave incorrectly if EU clock gating is enabled. - * Disable thread stall DOP gating and EU DOP gating. - */ - if (stream->oa->xe->info.platform == XE_DG2) { - xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); - xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DISABLE_DOP_GATING)); - } - - /* Disable clk ratio reports */ - oa_debug = OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | - OAG_OA_DEBUG_INCLUDE_CLK_RATIO; - - if (GRAPHICS_VER(stream->oa->xe) >= 20) - oa_debug |= - /* The three bits below are needed to get PEC counters running */ - OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL | - OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL | - OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL; - - xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug, - _MASKED_BIT_ENABLE(oa_debug) | - oag_report_ctx_switches(stream) | - oag_configure_mmio_trigger(stream, true)); - - xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ? - (OAG_OAGLBCTXCTRL_COUNTER_RESUME | - OAG_OAGLBCTXCTRL_TIMER_ENABLE | - REG_FIELD_PREP(OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK, - stream->period_exponent)) : 0); - - /* - * Initialize Super Queue Internal Cnt Register - * Set PMON Enable in order to collect valid metrics - * Enable bytes per clock reporting - */ - sqcnt1 = SQCNT1_PMON_ENABLE | - (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); - - xe_mmio_rmw32(mmio, XELPMP_SQCNT1, 0, sqcnt1); - - /* Configure OAR/OAC */ - if (stream->exec_q) { - ret = xe_oa_configure_oa_context(stream, true); - if (ret) - return ret; - } - - return xe_oa_emit_oa_config(stream, stream->oa_config); -} - -static void xe_oa_stream_enable(struct xe_oa_stream *stream) -{ - stream->pollin = false; - - xe_oa_enable(stream); - - if (stream->sample) - hrtimer_start(&stream->poll_check_timer, - ns_to_ktime(stream->poll_period_ns), - HRTIMER_MODE_REL_PINNED); -} - -static void xe_oa_stream_disable(struct xe_oa_stream *stream) -{ - xe_oa_disable(stream); - - if (stream->sample) - hrtimer_cancel(&stream->poll_check_timer); -} - -static int xe_oa_enable_preempt_timeslice(struct xe_oa_stream *stream) -{ - struct xe_exec_queue *q = stream->exec_q; - int ret1, ret2; - - /* Best effort recovery: try to revert both to original, irrespective of error */ - ret1 = q->ops->set_timeslice(q, stream->hwe->eclass->sched_props.timeslice_us); - ret2 = q->ops->set_preempt_timeout(q, stream->hwe->eclass->sched_props.preempt_timeout_us); - if (ret1 || ret2) - goto err; - return 0; -err: - drm_dbg(&stream->oa->xe->drm, "%s failed ret1 %d ret2 %d\n", __func__, ret1, ret2); - return ret1 ?: ret2; -} - -static int xe_oa_disable_preempt_timeslice(struct xe_oa_stream *stream) -{ - struct xe_exec_queue *q = stream->exec_q; - int ret; - - /* Setting values to 0 will disable timeslice and preempt_timeout */ - ret = q->ops->set_timeslice(q, 0); - if (ret) - goto err; - - ret = q->ops->set_preempt_timeout(q, 0); - if (ret) - goto err; - - return 0; -err: - xe_oa_enable_preempt_timeslice(stream); - drm_dbg(&stream->oa->xe->drm, "%s failed %d\n", __func__, ret); - return ret; -} - -static int xe_oa_enable_locked(struct xe_oa_stream *stream) -{ - if (stream->enabled) - return 0; - - if (stream->no_preempt) { - int ret = xe_oa_disable_preempt_timeslice(stream); - - if (ret) - return ret; - } - - xe_oa_stream_enable(stream); - - stream->enabled = true; - return 0; -} - -static int xe_oa_disable_locked(struct xe_oa_stream *stream) -{ - int ret = 0; - - if (!stream->enabled) - return 0; - - xe_oa_stream_disable(stream); - - if (stream->no_preempt) - ret = xe_oa_enable_preempt_timeslice(stream); - - stream->enabled = false; - return ret; -} - -static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) -{ - struct drm_xe_ext_set_property ext; - long ret = stream->oa_config->id; - struct xe_oa_config *config; - int err; - - err = __copy_from_user(&ext, u64_to_user_ptr(arg), sizeof(ext)); - if (XE_IOCTL_DBG(stream->oa->xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(stream->oa->xe, ext.pad) || - XE_IOCTL_DBG(stream->oa->xe, ext.base.name != DRM_XE_OA_EXTENSION_SET_PROPERTY) || - XE_IOCTL_DBG(stream->oa->xe, ext.base.next_extension) || - XE_IOCTL_DBG(stream->oa->xe, ext.property != DRM_XE_OA_PROPERTY_OA_METRIC_SET)) - return -EINVAL; - - config = xe_oa_get_oa_config(stream->oa, ext.value); - if (!config) - return -ENODEV; - - if (config != stream->oa_config) { - err = xe_oa_emit_oa_config(stream, config); - if (!err) - config = xchg(&stream->oa_config, config); - else - ret = err; - } - - xe_oa_config_put(config); - - return ret; -} - -static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) -{ - struct drm_xe_oa_stream_status status = {}; - void __user *uaddr = (void __user *)arg; - - /* Map from register to uapi bits */ - if (stream->oa_status & OASTATUS_REPORT_LOST) - status.oa_status |= DRM_XE_OASTATUS_REPORT_LOST; - if (stream->oa_status & OASTATUS_BUFFER_OVERFLOW) - status.oa_status |= DRM_XE_OASTATUS_BUFFER_OVERFLOW; - if (stream->oa_status & OASTATUS_COUNTER_OVERFLOW) - status.oa_status |= DRM_XE_OASTATUS_COUNTER_OVERFLOW; - if (stream->oa_status & OASTATUS_MMIO_TRG_Q_FULL) - status.oa_status |= DRM_XE_OASTATUS_MMIO_TRG_Q_FULL; - - if (copy_to_user(uaddr, &status, sizeof(status))) - return -EFAULT; - - return 0; -} - -static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) -{ - struct drm_xe_oa_stream_info info = { .oa_buf_size = XE_OA_BUFFER_SIZE, }; - void __user *uaddr = (void __user *)arg; - - if (copy_to_user(uaddr, &info, sizeof(info))) - return -EFAULT; - - return 0; -} - -static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, - unsigned int cmd, - unsigned long arg) -{ - switch (cmd) { - case DRM_XE_OBSERVATION_IOCTL_ENABLE: - return xe_oa_enable_locked(stream); - case DRM_XE_OBSERVATION_IOCTL_DISABLE: - return xe_oa_disable_locked(stream); - case DRM_XE_OBSERVATION_IOCTL_CONFIG: - return xe_oa_config_locked(stream, arg); - case DRM_XE_OBSERVATION_IOCTL_STATUS: - return xe_oa_status_locked(stream, arg); - case DRM_XE_OBSERVATION_IOCTL_INFO: - return xe_oa_info_locked(stream, arg); - } - - return -EINVAL; -} - -static long xe_oa_ioctl(struct file *file, - unsigned int cmd, - unsigned long arg) -{ - struct xe_oa_stream *stream = file->private_data; - long ret; - - mutex_lock(&stream->stream_lock); - ret = xe_oa_ioctl_locked(stream, cmd, arg); - mutex_unlock(&stream->stream_lock); - - return ret; -} - -static void xe_oa_destroy_locked(struct xe_oa_stream *stream) -{ - if (stream->enabled) - xe_oa_disable_locked(stream); - - xe_oa_stream_destroy(stream); - - if (stream->exec_q) - xe_exec_queue_put(stream->exec_q); - - kfree(stream); -} - -static int xe_oa_release(struct inode *inode, struct file *file) -{ - struct xe_oa_stream *stream = file->private_data; - struct xe_gt *gt = stream->gt; - - mutex_lock(>->oa.gt_lock); - xe_oa_destroy_locked(stream); - mutex_unlock(>->oa.gt_lock); - - /* Release the reference the OA stream kept on the driver */ - drm_dev_put(>_to_xe(gt)->drm); - - return 0; -} - -static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) -{ - struct xe_oa_stream *stream = file->private_data; - struct xe_bo *bo = stream->oa_buffer.bo; - unsigned long start = vma->vm_start; - int i, ret; - - if (xe_observation_paranoid && !perfmon_capable()) { - drm_dbg(&stream->oa->xe->drm, "Insufficient privilege to map OA buffer\n"); - return -EACCES; - } - - /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ - if (vma->vm_end - vma->vm_start != XE_OA_BUFFER_SIZE) { - drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); - return -EINVAL; - } - - /* - * Only support VM_READ, enforce MAP_PRIVATE by checking for - * VM_MAYSHARE, don't copy the vma on fork - */ - if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_SHARED | VM_MAYSHARE)) { - drm_dbg(&stream->oa->xe->drm, "mmap must be read only\n"); - return -EINVAL; - } - vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY, - VM_MAYWRITE | VM_MAYEXEC); - - xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == vma_pages(vma)); - for (i = 0; i < bo->ttm.ttm->num_pages; i++) { - ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]), - PAGE_SIZE, vma->vm_page_prot); - if (ret) - break; - - start += PAGE_SIZE; - } - - return ret; -} - -static const struct file_operations xe_oa_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .release = xe_oa_release, - .poll = xe_oa_poll, - .read = xe_oa_read, - .unlocked_ioctl = xe_oa_ioctl, - .mmap = xe_oa_mmap, -}; - -static bool engine_supports_mi_query(struct xe_hw_engine *hwe) -{ - return hwe->class == XE_ENGINE_CLASS_RENDER || - hwe->class == XE_ENGINE_CLASS_COMPUTE; -} - -static bool xe_oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end) -{ - u32 idx = *offset; - u32 len = min(MI_LRI_LEN(state[idx]) + idx, end); - bool found = false; - - idx++; - for (; idx < len; idx += 2) { - if (state[idx] == reg) { - found = true; - break; - } - } - - *offset = idx; - return found; -} - -#define IS_MI_LRI_CMD(x) (REG_FIELD_GET(MI_OPCODE, (x)) == \ - REG_FIELD_GET(MI_OPCODE, MI_LOAD_REGISTER_IMM)) - -static u32 xe_oa_context_image_offset(struct xe_oa_stream *stream, u32 reg) -{ - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 len = (xe_gt_lrc_size(stream->gt, stream->hwe->class) + - lrc->ring.size) / sizeof(u32); - u32 offset = xe_lrc_regs_offset(lrc) / sizeof(u32); - u32 *state = (u32 *)lrc->bo->vmap.vaddr; - - if (drm_WARN_ON(&stream->oa->xe->drm, !state)) - return U32_MAX; - - for (; offset < len; ) { - if (IS_MI_LRI_CMD(state[offset])) { - /* - * We expect reg-value pairs in MI_LRI command, so - * MI_LRI_LEN() should be even - */ - drm_WARN_ON(&stream->oa->xe->drm, - MI_LRI_LEN(state[offset]) & 0x1); - - if (xe_oa_find_reg_in_lri(state, reg, &offset, len)) - break; - } else { - offset++; - } - } - - return offset < len ? offset : U32_MAX; -} - -static int xe_oa_set_ctx_ctrl_offset(struct xe_oa_stream *stream) -{ - struct xe_reg reg = OACTXCONTROL(stream->hwe->mmio_base); - u32 offset = stream->oa->ctx_oactxctrl_offset[stream->hwe->class]; - - /* Do this only once. Failure is stored as offset of U32_MAX */ - if (offset) - goto exit; - - offset = xe_oa_context_image_offset(stream, reg.addr); - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] = offset; - - drm_dbg(&stream->oa->xe->drm, "%s oa ctx control at 0x%08x dword offset\n", - stream->hwe->name, offset); -exit: - return offset && offset != U32_MAX ? 0 : -ENODEV; -} - -static int xe_oa_stream_init(struct xe_oa_stream *stream, - struct xe_oa_open_param *param) -{ - struct xe_oa_unit *u = param->hwe->oa_unit; - struct xe_gt *gt = param->hwe->gt; - int ret; - - stream->exec_q = param->exec_q; - stream->poll_period_ns = DEFAULT_POLL_PERIOD_NS; - stream->hwe = param->hwe; - stream->gt = stream->hwe->gt; - stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format]; - - stream->sample = param->sample; - stream->periodic = param->period_exponent > 0; - stream->period_exponent = param->period_exponent; - stream->no_preempt = param->no_preempt; - - /* - * For Xe2+, when overrun mode is enabled, there are no partial reports at the end - * of buffer, making the OA buffer effectively a non-power-of-2 size circular - * buffer whose size, circ_size, is a multiple of the report size - */ - if (GRAPHICS_VER(stream->oa->xe) >= 20 && - stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) - stream->oa_buffer.circ_size = - XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size; - else - stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE; - - if (stream->exec_q && engine_supports_mi_query(stream->hwe)) { - /* If we don't find the context offset, just return error */ - ret = xe_oa_set_ctx_ctrl_offset(stream); - if (ret) { - drm_err(&stream->oa->xe->drm, - "xe_oa_set_ctx_ctrl_offset failed for %s\n", - stream->hwe->name); - goto exit; - } - } - - stream->oa_config = xe_oa_get_oa_config(stream->oa, param->metric_set); - if (!stream->oa_config) { - drm_dbg(&stream->oa->xe->drm, "Invalid OA config id=%i\n", param->metric_set); - ret = -EINVAL; - goto exit; - } - - /* - * Wa_1509372804:pvc - * - * GuC reset of engines causes OA to lose configuration - * state. Prevent this by overriding GUCRC mode. - */ - if (stream->oa->xe->info.platform == XE_PVC) { - ret = xe_guc_pc_override_gucrc_mode(>->uc.guc.pc, - SLPC_GUCRC_MODE_GUCRC_NO_RC6); - if (ret) - goto err_free_configs; - - stream->override_gucrc = true; - } - - /* Take runtime pm ref and forcewake to disable RC6 */ - xe_pm_runtime_get(stream->oa->xe); - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - - ret = xe_oa_alloc_oa_buffer(stream); - if (ret) - goto err_fw_put; - - stream->k_exec_q = xe_exec_queue_create(stream->oa->xe, NULL, - BIT(stream->hwe->logical_instance), 1, - stream->hwe, EXEC_QUEUE_FLAG_KERNEL, 0); - if (IS_ERR(stream->k_exec_q)) { - ret = PTR_ERR(stream->k_exec_q); - drm_err(&stream->oa->xe->drm, "gt%d, hwe %s, xe_exec_queue_create failed=%d", - stream->gt->info.id, stream->hwe->name, ret); - goto err_free_oa_buf; - } - - ret = xe_oa_enable_metric_set(stream); - if (ret) { - drm_dbg(&stream->oa->xe->drm, "Unable to enable metric set\n"); - goto err_put_k_exec_q; - } - - drm_dbg(&stream->oa->xe->drm, "opening stream oa config uuid=%s\n", - stream->oa_config->uuid); - - WRITE_ONCE(u->exclusive_stream, stream); - - hrtimer_init(&stream->poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - stream->poll_check_timer.function = xe_oa_poll_check_timer_cb; - init_waitqueue_head(&stream->poll_wq); - - spin_lock_init(&stream->oa_buffer.ptr_lock); - mutex_init(&stream->stream_lock); - - return 0; - -err_put_k_exec_q: - xe_oa_disable_metric_set(stream); - xe_exec_queue_put(stream->k_exec_q); -err_free_oa_buf: - xe_oa_free_oa_buffer(stream); -err_fw_put: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_pm_runtime_put(stream->oa->xe); - if (stream->override_gucrc) - xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); -err_free_configs: - xe_oa_free_configs(stream); -exit: - return ret; -} - -static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, - struct xe_oa_open_param *param) -{ - struct xe_oa_stream *stream; - int stream_fd; - int ret; - - /* We currently only allow exclusive access */ - if (param->hwe->oa_unit->exclusive_stream) { - drm_dbg(&oa->xe->drm, "OA unit already in use\n"); - ret = -EBUSY; - goto exit; - } - - stream = kzalloc(sizeof(*stream), GFP_KERNEL); - if (!stream) { - ret = -ENOMEM; - goto exit; - } - - stream->oa = oa; - ret = xe_oa_stream_init(stream, param); - if (ret) - goto err_free; - - if (!param->disabled) { - ret = xe_oa_enable_locked(stream); - if (ret) - goto err_destroy; - } - - stream_fd = anon_inode_getfd("[xe_oa]", &xe_oa_fops, stream, 0); - if (stream_fd < 0) { - ret = stream_fd; - goto err_disable; - } - - /* Hold a reference on the drm device till stream_fd is released */ - drm_dev_get(&stream->oa->xe->drm); - - return stream_fd; -err_disable: - if (!param->disabled) - xe_oa_disable_locked(stream); -err_destroy: - xe_oa_stream_destroy(stream); -err_free: - kfree(stream); -exit: - return ret; -} - -/** - * xe_oa_timestamp_frequency - Return OA timestamp frequency - * @gt: @xe_gt - * - * OA timestamp frequency = CS timestamp frequency in most platforms. On some - * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such - * cases, return the adjusted CS timestamp frequency to the user. - */ -u32 xe_oa_timestamp_frequency(struct xe_gt *gt) -{ - u32 reg, shift; - - /* - * Wa_18013179988:dg2 - * Wa_14015568240:pvc - * Wa_14015846243:mtl - */ - switch (gt_to_xe(gt)->info.platform) { - case XE_DG2: - case XE_PVC: - case XE_METEORLAKE: - xe_pm_runtime_get(gt_to_xe(gt)); - reg = xe_mmio_read32(>->mmio, RPM_CONFIG0); - xe_pm_runtime_put(gt_to_xe(gt)); - - shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); - return gt->info.reference_clock << (3 - shift); - - default: - return gt->info.reference_clock; - } -} - -static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent) -{ - u64 nom = (2ULL << exponent) * NSEC_PER_SEC; - u32 den = xe_oa_timestamp_frequency(gt); - - return div_u64(nom + den - 1, den); -} - -static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type) -{ - switch (hwe->oa_unit->type) { - case DRM_XE_OA_UNIT_TYPE_OAG: - return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR || - type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; - case DRM_XE_OA_UNIT_TYPE_OAM: - return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; - default: - return false; - } -} - -static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name) -{ - u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt); - u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt); - u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); - u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt); - int idx; - - for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) { - const struct xe_oa_format *f = &oa->oa_formats[idx]; - - if (counter_size == f->counter_size && bc_report == f->bc_report && - type == f->type && counter_sel == f->counter_select) { - *name = idx; - return 0; - } - } - - return -EINVAL; -} - -/** - * xe_oa_unit_id - Return OA unit ID for a hardware engine - * @hwe: @xe_hw_engine - * - * Return OA unit ID for a hardware engine when available - */ -u16 xe_oa_unit_id(struct xe_hw_engine *hwe) -{ - return hwe->oa_unit && hwe->oa_unit->num_engines ? - hwe->oa_unit->oa_unit_id : U16_MAX; -} - -static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) -{ - struct xe_gt *gt; - int i, ret = 0; - - if (param->exec_q) { - /* When we have an exec_q, get hwe from the exec_q */ - param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class, - param->engine_instance, true); - } else { - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - - /* Else just get the first hwe attached to the oa unit */ - for_each_gt(gt, oa->xe, i) { - for_each_hw_engine(hwe, gt, id) { - if (xe_oa_unit_id(hwe) == param->oa_unit_id) { - param->hwe = hwe; - goto out; - } - } - } - } -out: - if (!param->hwe || xe_oa_unit_id(param->hwe) != param->oa_unit_id) { - drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", - param->exec_q ? param->exec_q->class : -1, - param->engine_instance, param->oa_unit_id); - ret = -EINVAL; - } - - return ret; -} - -static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - if (value >= oa->oa_unit_ids) { - drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); - return -EINVAL; - } - param->oa_unit_id = value; - return 0; -} - -static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->sample = value; - return 0; -} - -static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->metric_set = value; - return 0; -} - -static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - int ret = decode_oa_format(oa, value, ¶m->oa_format); - - if (ret) { - drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value); - return ret; - } - return 0; -} - -static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ -#define OA_EXPONENT_MAX 31 - - if (value > OA_EXPONENT_MAX) { - drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX); - return -EINVAL; - } - param->period_exponent = value; - return 0; -} - -static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->disabled = value; - return 0; -} - -static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->exec_queue_id = value; - return 0; -} - -static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->engine_instance = value; - return 0; -} - -static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->no_preempt = value; - return 0; -} - -typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param); -static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { - [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id, - [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa, - [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, - [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format, - [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent, - [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, - [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, - [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, - [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, -}; - -static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension, - struct xe_oa_open_param *param) -{ - u64 __user *address = u64_to_user_ptr(extension); - struct drm_xe_ext_set_property ext; - int err; - u32 idx; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(oa->xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs)) || - XE_IOCTL_DBG(oa->xe, ext.pad)) - return -EINVAL; - - idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs)); - return xe_oa_set_property_funcs[idx](oa, ext.value, param); -} - -typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, u64 extension, - struct xe_oa_open_param *param); -static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { - [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, -}; - -#define MAX_USER_EXTENSIONS 16 -static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number, - struct xe_oa_open_param *param) -{ - u64 __user *address = u64_to_user_ptr(extension); - struct drm_xe_user_extension ext; - int err; - u32 idx; - - if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) - return -E2BIG; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(oa->xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(oa->xe, ext.pad) || - XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs))) - return -EINVAL; - - idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs)); - err = xe_oa_user_extension_funcs[idx](oa, extension, param); - if (XE_IOCTL_DBG(oa->xe, err)) - return err; - - if (ext.next_extension) - return xe_oa_user_extensions(oa, ext.next_extension, ++ext_number, param); - - return 0; -} - -/** - * xe_oa_stream_open_ioctl - Opens an OA stream - * @dev: @drm_device - * @data: pointer to struct @drm_xe_oa_config - * @file: @drm_file - * - * The functions opens an OA stream. An OA stream, opened with specified - * properties, enables OA counter samples to be collected, either - * periodically (time based sampling), or on request (using OA queries) - */ -int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_oa *oa = &xe->oa; - struct xe_file *xef = to_xe_file(file); - struct xe_oa_open_param param = {}; - const struct xe_oa_format *f; - bool privileged_op = true; - int ret; - - if (!oa->xe) { - drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); - return -ENODEV; - } - - ret = xe_oa_user_extensions(oa, data, 0, ¶m); - if (ret) - return ret; - - if (param.exec_queue_id > 0) { - param.exec_q = xe_exec_queue_lookup(xef, param.exec_queue_id); - if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) - return -ENOENT; - - if (param.exec_q->width > 1) - drm_dbg(&oa->xe->drm, "exec_q->width > 1, programming only exec_q->lrc[0]\n"); - } - - /* - * Query based sampling (using MI_REPORT_PERF_COUNT) with OAR/OAC, - * without global stream access, can be an unprivileged operation - */ - if (param.exec_q && !param.sample) - privileged_op = false; - - if (param.no_preempt) { - if (!param.exec_q) { - drm_dbg(&oa->xe->drm, "Preemption disable without exec_q!\n"); - ret = -EINVAL; - goto err_exec_q; - } - privileged_op = true; - } - - if (privileged_op && xe_observation_paranoid && !perfmon_capable()) { - drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe OA stream\n"); - ret = -EACCES; - goto err_exec_q; - } - - if (!param.exec_q && !param.sample) { - drm_dbg(&oa->xe->drm, "Only OA report sampling supported\n"); - ret = -EINVAL; - goto err_exec_q; - } - - ret = xe_oa_assign_hwe(oa, ¶m); - if (ret) - goto err_exec_q; - - f = &oa->oa_formats[param.oa_format]; - if (!param.oa_format || !f->size || - !engine_supports_oa_format(param.hwe, f->type)) { - drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n", - param.oa_format, f->type, f->size, param.hwe->class); - ret = -EINVAL; - goto err_exec_q; - } - - if (param.period_exponent > 0) { - u64 oa_period, oa_freq_hz; - - /* Requesting samples from OAG buffer is a privileged operation */ - if (!param.sample) { - drm_dbg(&oa->xe->drm, "OA_EXPONENT specified without SAMPLE_OA\n"); - ret = -EINVAL; - goto err_exec_q; - } - oa_period = oa_exponent_to_ns(param.hwe->gt, param.period_exponent); - oa_freq_hz = div64_u64(NSEC_PER_SEC, oa_period); - drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); - } - - mutex_lock(¶m.hwe->gt->oa.gt_lock); - ret = xe_oa_stream_open_ioctl_locked(oa, ¶m); - mutex_unlock(¶m.hwe->gt->oa.gt_lock); -err_exec_q: - if (ret < 0 && param.exec_q) - xe_exec_queue_put(param.exec_q); - return ret; -} - -static bool xe_oa_is_valid_flex_addr(struct xe_oa *oa, u32 addr) -{ - static const struct xe_reg flex_eu_regs[] = { - EU_PERF_CNTL0, - EU_PERF_CNTL1, - EU_PERF_CNTL2, - EU_PERF_CNTL3, - EU_PERF_CNTL4, - EU_PERF_CNTL5, - EU_PERF_CNTL6, - }; - int i; - - for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { - if (flex_eu_regs[i].addr == addr) - return true; - } - return false; -} - -static bool xe_oa_reg_in_range_table(u32 addr, const struct xe_mmio_range *table) -{ - while (table->start && table->end) { - if (addr >= table->start && addr <= table->end) - return true; - - table++; - } - - return false; -} - -static const struct xe_mmio_range xehp_oa_b_counters[] = { - { .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */ - { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */ - {} -}; - -static const struct xe_mmio_range gen12_oa_b_counters[] = { - { .start = 0x2b2c, .end = 0x2b2c }, /* OAG_OA_PESS */ - { .start = 0xd900, .end = 0xd91c }, /* OAG_OASTARTTRIG[1-8] */ - { .start = 0xd920, .end = 0xd93c }, /* OAG_OAREPORTTRIG1[1-8] */ - { .start = 0xd940, .end = 0xd97c }, /* OAG_CEC[0-7][0-1] */ - { .start = 0xdc00, .end = 0xdc3c }, /* OAG_SCEC[0-7][0-1] */ - { .start = 0xdc40, .end = 0xdc40 }, /* OAG_SPCTR_CNF */ - { .start = 0xdc44, .end = 0xdc44 }, /* OAA_DBG_REG */ - {} -}; - -static const struct xe_mmio_range mtl_oam_b_counters[] = { - { .start = 0x393000, .end = 0x39301c }, /* OAM_STARTTRIG1[1-8] */ - { .start = 0x393020, .end = 0x39303c }, /* OAM_REPORTTRIG1[1-8] */ - { .start = 0x393040, .end = 0x39307c }, /* OAM_CEC[0-7][0-1] */ - { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */ - {} -}; - -static const struct xe_mmio_range xe2_oa_b_counters[] = { - { .start = 0x393200, .end = 0x39323C }, /* MPES_0_MPES_SAG - MPES_7_UPPER_MPES_SAG */ - { .start = 0x394200, .end = 0x39423C }, /* MPES_0_MPES_SCMI0 - MPES_7_UPPER_MPES_SCMI0 */ - { .start = 0x394A00, .end = 0x394A3C }, /* MPES_0_MPES_SCMI1 - MPES_7_UPPER_MPES_SCMI1 */ - {}, -}; - -static bool xe_oa_is_valid_b_counter_addr(struct xe_oa *oa, u32 addr) -{ - return xe_oa_reg_in_range_table(addr, xehp_oa_b_counters) || - xe_oa_reg_in_range_table(addr, gen12_oa_b_counters) || - xe_oa_reg_in_range_table(addr, mtl_oam_b_counters) || - (GRAPHICS_VER(oa->xe) >= 20 && - xe_oa_reg_in_range_table(addr, xe2_oa_b_counters)); -} - -static const struct xe_mmio_range mtl_oa_mux_regs[] = { - { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ - { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ - { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ - { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ - { .start = 0x38d100, .end = 0x38d114}, /* VISACTL */ - {} -}; - -static const struct xe_mmio_range gen12_oa_mux_regs[] = { - { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ - { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ - { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ - { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ - { .start = 0x20cc, .end = 0x20cc }, /* WAIT_FOR_RC6_EXIT */ - {} -}; - -static const struct xe_mmio_range xe2_oa_mux_regs[] = { - { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ - { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ - { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ - { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ - { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ - { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ - {}, -}; - -static bool xe_oa_is_valid_mux_addr(struct xe_oa *oa, u32 addr) -{ - if (GRAPHICS_VER(oa->xe) >= 20) - return xe_oa_reg_in_range_table(addr, xe2_oa_mux_regs); - else if (GRAPHICS_VERx100(oa->xe) >= 1270) - return xe_oa_reg_in_range_table(addr, mtl_oa_mux_regs); - else - return xe_oa_reg_in_range_table(addr, gen12_oa_mux_regs); -} - -static bool xe_oa_is_valid_config_reg_addr(struct xe_oa *oa, u32 addr) -{ - return xe_oa_is_valid_flex_addr(oa, addr) || - xe_oa_is_valid_b_counter_addr(oa, addr) || - xe_oa_is_valid_mux_addr(oa, addr); -} - -static struct xe_oa_reg * -xe_oa_alloc_regs(struct xe_oa *oa, bool (*is_valid)(struct xe_oa *oa, u32 addr), - u32 __user *regs, u32 n_regs) -{ - struct xe_oa_reg *oa_regs; - int err; - u32 i; - - oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL); - if (!oa_regs) - return ERR_PTR(-ENOMEM); - - for (i = 0; i < n_regs; i++) { - u32 addr, value; - - err = get_user(addr, regs); - if (err) - goto addr_err; - - if (!is_valid(oa, addr)) { - drm_dbg(&oa->xe->drm, "Invalid oa_reg address: %X\n", addr); - err = -EINVAL; - goto addr_err; - } - - err = get_user(value, regs + 1); - if (err) - goto addr_err; - - oa_regs[i].addr = XE_REG(addr); - oa_regs[i].value = value; - - regs += 2; - } - - return oa_regs; - -addr_err: - kfree(oa_regs); - return ERR_PTR(err); -} - -static ssize_t show_dynamic_id(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - struct xe_oa_config *oa_config = - container_of(attr, typeof(*oa_config), sysfs_metric_id); - - return sysfs_emit(buf, "%d\n", oa_config->id); -} - -static int create_dynamic_oa_sysfs_entry(struct xe_oa *oa, - struct xe_oa_config *oa_config) -{ - sysfs_attr_init(&oa_config->sysfs_metric_id.attr); - oa_config->sysfs_metric_id.attr.name = "id"; - oa_config->sysfs_metric_id.attr.mode = 0444; - oa_config->sysfs_metric_id.show = show_dynamic_id; - oa_config->sysfs_metric_id.store = NULL; - - oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr; - oa_config->attrs[1] = NULL; - - oa_config->sysfs_metric.name = oa_config->uuid; - oa_config->sysfs_metric.attrs = oa_config->attrs; - - return sysfs_create_group(oa->metrics_kobj, &oa_config->sysfs_metric); -} - -/** - * xe_oa_add_config_ioctl - Adds one OA config - * @dev: @drm_device - * @data: pointer to struct @drm_xe_oa_config - * @file: @drm_file - * - * The functions adds an OA config to the set of OA configs maintained in - * the kernel. The config determines which OA metrics are collected for an - * OA stream. - */ -int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_oa *oa = &xe->oa; - struct drm_xe_oa_config param; - struct drm_xe_oa_config *arg = ¶m; - struct xe_oa_config *oa_config, *tmp; - struct xe_oa_reg *regs; - int err, id; - - if (!oa->xe) { - drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); - return -ENODEV; - } - - if (xe_observation_paranoid && !perfmon_capable()) { - drm_dbg(&oa->xe->drm, "Insufficient privileges to add xe OA config\n"); - return -EACCES; - } - - err = __copy_from_user(¶m, u64_to_user_ptr(data), sizeof(param)); - if (XE_IOCTL_DBG(oa->xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(oa->xe, arg->extensions) || - XE_IOCTL_DBG(oa->xe, !arg->regs_ptr) || - XE_IOCTL_DBG(oa->xe, !arg->n_regs)) - return -EINVAL; - - oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); - if (!oa_config) - return -ENOMEM; - - oa_config->oa = oa; - kref_init(&oa_config->ref); - - if (!uuid_is_valid(arg->uuid)) { - drm_dbg(&oa->xe->drm, "Invalid uuid format for OA config\n"); - err = -EINVAL; - goto reg_err; - } - - /* Last character in oa_config->uuid will be 0 because oa_config is kzalloc */ - memcpy(oa_config->uuid, arg->uuid, sizeof(arg->uuid)); - - oa_config->regs_len = arg->n_regs; - regs = xe_oa_alloc_regs(oa, xe_oa_is_valid_config_reg_addr, - u64_to_user_ptr(arg->regs_ptr), - arg->n_regs); - if (IS_ERR(regs)) { - drm_dbg(&oa->xe->drm, "Failed to create OA config for mux_regs\n"); - err = PTR_ERR(regs); - goto reg_err; - } - oa_config->regs = regs; - - err = mutex_lock_interruptible(&oa->metrics_lock); - if (err) - goto reg_err; - - /* We shouldn't have too many configs, so this iteration shouldn't be too costly */ - idr_for_each_entry(&oa->metrics_idr, tmp, id) { - if (!strcmp(tmp->uuid, oa_config->uuid)) { - drm_dbg(&oa->xe->drm, "OA config already exists with this uuid\n"); - err = -EADDRINUSE; - goto sysfs_err; - } - } - - err = create_dynamic_oa_sysfs_entry(oa, oa_config); - if (err) { - drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n"); - goto sysfs_err; - } - - oa_config->id = idr_alloc(&oa->metrics_idr, oa_config, 1, 0, GFP_KERNEL); - if (oa_config->id < 0) { - drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n"); - err = oa_config->id; - goto sysfs_err; - } - - mutex_unlock(&oa->metrics_lock); - - drm_dbg(&oa->xe->drm, "Added config %s id=%i\n", oa_config->uuid, oa_config->id); - - return oa_config->id; - -sysfs_err: - mutex_unlock(&oa->metrics_lock); -reg_err: - xe_oa_config_put(oa_config); - drm_dbg(&oa->xe->drm, "Failed to add new OA config\n"); - return err; -} - -/** - * xe_oa_remove_config_ioctl - Removes one OA config - * @dev: @drm_device - * @data: pointer to struct @drm_xe_observation_param - * @file: @drm_file - */ -int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_oa *oa = &xe->oa; - struct xe_oa_config *oa_config; - u64 arg, *ptr = u64_to_user_ptr(data); - int ret; - - if (!oa->xe) { - drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); - return -ENODEV; - } - - if (xe_observation_paranoid && !perfmon_capable()) { - drm_dbg(&oa->xe->drm, "Insufficient privileges to remove xe OA config\n"); - return -EACCES; - } - - ret = get_user(arg, ptr); - if (XE_IOCTL_DBG(oa->xe, ret)) - return ret; - - ret = mutex_lock_interruptible(&oa->metrics_lock); - if (ret) - return ret; - - oa_config = idr_find(&oa->metrics_idr, arg); - if (!oa_config) { - drm_dbg(&oa->xe->drm, "Failed to remove unknown OA config\n"); - ret = -ENOENT; - goto err_unlock; - } - - WARN_ON(arg != oa_config->id); - - sysfs_remove_group(oa->metrics_kobj, &oa_config->sysfs_metric); - idr_remove(&oa->metrics_idr, arg); - - mutex_unlock(&oa->metrics_lock); - - drm_dbg(&oa->xe->drm, "Removed config %s id=%i\n", oa_config->uuid, oa_config->id); - - xe_oa_config_put(oa_config); - - return 0; - -err_unlock: - mutex_unlock(&oa->metrics_lock); - return ret; -} - -/** - * xe_oa_register - Xe OA registration - * @xe: @xe_device - * - * Exposes the metrics sysfs directory upon completion of module initialization - */ -void xe_oa_register(struct xe_device *xe) -{ - struct xe_oa *oa = &xe->oa; - - if (!oa->xe) - return; - - oa->metrics_kobj = kobject_create_and_add("metrics", - &xe->drm.primary->kdev->kobj); -} - -/** - * xe_oa_unregister - Xe OA de-registration - * @xe: @xe_device - */ -void xe_oa_unregister(struct xe_device *xe) -{ - struct xe_oa *oa = &xe->oa; - - if (!oa->metrics_kobj) - return; - - kobject_put(oa->metrics_kobj); - oa->metrics_kobj = NULL; -} - -static u32 num_oa_units_per_gt(struct xe_gt *gt) -{ - return 1; -} - -static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) -{ - if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) { - /* - * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices - * within the gt use the same OAM. All MTL/LNL SKUs list 1 SA MEDIA - */ - xe_gt_WARN_ON(hwe->gt, hwe->gt->info.type != XE_GT_TYPE_MEDIA); - - return 0; - } - - return XE_OA_UNIT_INVALID; -} - -static u32 __hwe_oa_unit(struct xe_hw_engine *hwe) -{ - switch (hwe->class) { - case XE_ENGINE_CLASS_RENDER: - case XE_ENGINE_CLASS_COMPUTE: - return 0; - - case XE_ENGINE_CLASS_VIDEO_DECODE: - case XE_ENGINE_CLASS_VIDEO_ENHANCE: - return __hwe_oam_unit(hwe); - - default: - return XE_OA_UNIT_INVALID; - } -} - -static struct xe_oa_regs __oam_regs(u32 base) -{ - return (struct xe_oa_regs) { - base, - OAM_HEAD_POINTER(base), - OAM_TAIL_POINTER(base), - OAM_BUFFER(base), - OAM_CONTEXT_CONTROL(base), - OAM_CONTROL(base), - OAM_DEBUG(base), - OAM_STATUS(base), - OAM_CONTROL_COUNTER_SEL_MASK, - }; -} - -static struct xe_oa_regs __oag_regs(void) -{ - return (struct xe_oa_regs) { - 0, - OAG_OAHEADPTR, - OAG_OATAILPTR, - OAG_OABUFFER, - OAG_OAGLBCTXCTRL, - OAG_OACONTROL, - OAG_OA_DEBUG, - OAG_OASTATUS, - OAG_OACONTROL_OA_COUNTER_SEL_MASK, - }; -} - -static void __xe_oa_init_oa_units(struct xe_gt *gt) -{ - const u32 mtl_oa_base[] = { 0x13000 }; - int i, num_units = gt->oa.num_oa_units; - - for (i = 0; i < num_units; i++) { - struct xe_oa_unit *u = >->oa.oa_unit[i]; - - if (gt->info.type != XE_GT_TYPE_MEDIA) { - u->regs = __oag_regs(); - u->type = DRM_XE_OA_UNIT_TYPE_OAG; - } else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { - u->regs = __oam_regs(mtl_oa_base[i]); - u->type = DRM_XE_OA_UNIT_TYPE_OAM; - } - - /* Ensure MMIO trigger remains disabled till there is a stream */ - xe_mmio_write32(>->mmio, u->regs.oa_debug, - oag_configure_mmio_trigger(NULL, false)); - - /* Set oa_unit_ids now to ensure ids remain contiguous */ - u->oa_unit_id = gt_to_xe(gt)->oa.oa_unit_ids++; - } -} - -static int xe_oa_init_gt(struct xe_gt *gt) -{ - u32 num_oa_units = num_oa_units_per_gt(gt); - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - struct xe_oa_unit *u; - - u = drmm_kcalloc(>_to_xe(gt)->drm, num_oa_units, sizeof(*u), GFP_KERNEL); - if (!u) - return -ENOMEM; - - for_each_hw_engine(hwe, gt, id) { - u32 index = __hwe_oa_unit(hwe); - - hwe->oa_unit = NULL; - if (index < num_oa_units) { - u[index].num_engines++; - hwe->oa_unit = &u[index]; - } - } - - /* - * Fused off engines can result in oa_unit's with num_engines == 0. These units - * will appear in OA unit query, but no OA streams can be opened on them. - */ - gt->oa.num_oa_units = num_oa_units; - gt->oa.oa_unit = u; - - __xe_oa_init_oa_units(gt); - - drmm_mutex_init(>_to_xe(gt)->drm, >->oa.gt_lock); - - return 0; -} - -static int xe_oa_init_oa_units(struct xe_oa *oa) -{ - struct xe_gt *gt; - int i, ret; - - for_each_gt(gt, oa->xe, i) { - ret = xe_oa_init_gt(gt); - if (ret) - return ret; - } - - return 0; -} - -static void oa_format_add(struct xe_oa *oa, enum xe_oa_format_name format) -{ - __set_bit(format, oa->format_mask); -} - -static void xe_oa_init_supported_formats(struct xe_oa *oa) -{ - if (GRAPHICS_VER(oa->xe) >= 20) { - /* Xe2+ */ - oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8); - oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8); - oa_format_add(oa, XE_OA_FORMAT_PEC64u64); - oa_format_add(oa, XE_OA_FORMAT_PEC64u64_B8_C8); - oa_format_add(oa, XE_OA_FORMAT_PEC64u32); - oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G1); - oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G1); - oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G2); - oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G2); - oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_32_G2_4); - oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_4_G2_32); - } else if (GRAPHICS_VERx100(oa->xe) >= 1270) { - /* XE_METEORLAKE */ - oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8); - oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8); - oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8); - oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8); - oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8); - oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8); - } else if (GRAPHICS_VERx100(oa->xe) >= 1255) { - /* XE_DG2, XE_PVC */ - oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8); - oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8); - oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8); - oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8); - } else { - /* Gen12+ */ - xe_assert(oa->xe, GRAPHICS_VER(oa->xe) >= 12); - oa_format_add(oa, XE_OA_FORMAT_A12); - oa_format_add(oa, XE_OA_FORMAT_A12_B8_C8); - oa_format_add(oa, XE_OA_FORMAT_A32u40_A4u32_B8_C8); - oa_format_add(oa, XE_OA_FORMAT_C4_B8); - } -} - -/** - * xe_oa_init - OA initialization during device probe - * @xe: @xe_device - * - * Return: 0 on success or a negative error code on failure - */ -int xe_oa_init(struct xe_device *xe) -{ - struct xe_oa *oa = &xe->oa; - int ret; - - /* Support OA only with GuC submission and Gen12+ */ - if (!xe_device_uc_enabled(xe) || GRAPHICS_VER(xe) < 12) - return 0; - - if (IS_SRIOV_VF(xe)) - return 0; - - oa->xe = xe; - oa->oa_formats = oa_formats; - - drmm_mutex_init(&oa->xe->drm, &oa->metrics_lock); - idr_init_base(&oa->metrics_idr, 1); - - ret = xe_oa_init_oa_units(oa); - if (ret) { - drm_err(&xe->drm, "OA initialization failed (%pe)\n", ERR_PTR(ret)); - goto exit; - } - - xe_oa_init_supported_formats(oa); - return 0; -exit: - oa->xe = NULL; - return ret; -} - -static int destroy_config(int id, void *p, void *data) -{ - xe_oa_config_put(p); - return 0; -} - -/** - * xe_oa_fini - OA de-initialization during device remove - * @xe: @xe_device - */ -void xe_oa_fini(struct xe_device *xe) -{ - struct xe_oa *oa = &xe->oa; - - if (!oa->xe) - return; - - idr_for_each(&oa->metrics_idr, destroy_config, oa); - idr_destroy(&oa->metrics_idr); - - oa->xe = NULL; -} diff --git a/rr-cache/ec396c63eece0f0c56a405443dd6f67970e78a95/preimage b/rr-cache/ec396c63eece0f0c56a405443dd6f67970e78a95/preimage deleted file mode 100644 index 533db4d6893d..000000000000 --- a/rr-cache/ec396c63eece0f0c56a405443dd6f67970e78a95/preimage +++ /dev/null @@ -1,2525 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2023-2024 Intel Corporation - */ - -#include <linux/anon_inodes.h> -#include <linux/delay.h> -#include <linux/nospec.h> -#include <linux/poll.h> - -#include <drm/drm_drv.h> -#include <drm/drm_managed.h> -#include <uapi/drm/xe_drm.h> - -#include "abi/guc_actions_slpc_abi.h" -#include "instructions/xe_mi_commands.h" -#include "regs/xe_engine_regs.h" -#include "regs/xe_gt_regs.h" -#include "regs/xe_lrc_layout.h" -#include "regs/xe_oa_regs.h" -#include "xe_assert.h" -#include "xe_bb.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_exec_queue.h" -#include "xe_force_wake.h" -#include "xe_gt.h" -#include "xe_gt_mcr.h" -#include "xe_gt_printk.h" -#include "xe_guc_pc.h" -#include "xe_lrc.h" -#include "xe_macros.h" -#include "xe_mmio.h" -#include "xe_oa.h" -#include "xe_observation.h" -#include "xe_pm.h" -#include "xe_sched_job.h" -#include "xe_sriov.h" - -#define DEFAULT_POLL_FREQUENCY_HZ 200 -#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) -#define XE_OA_UNIT_INVALID U32_MAX - -struct xe_oa_reg { - struct xe_reg addr; - u32 value; -}; - -struct xe_oa_config { - struct xe_oa *oa; - - char uuid[UUID_STRING_LEN + 1]; - int id; - - const struct xe_oa_reg *regs; - u32 regs_len; - - struct attribute_group sysfs_metric; - struct attribute *attrs[2]; - struct kobj_attribute sysfs_metric_id; - - struct kref ref; - struct rcu_head rcu; -}; - -struct flex { - struct xe_reg reg; - u32 offset; - u32 value; -}; - -struct xe_oa_open_param { - u32 oa_unit_id; - bool sample; - u32 metric_set; - enum xe_oa_format_name oa_format; - int period_exponent; - bool disabled; - int exec_queue_id; - int engine_instance; - struct xe_exec_queue *exec_q; - struct xe_hw_engine *hwe; - bool no_preempt; -}; - -struct xe_oa_config_bo { - struct llist_node node; - - struct xe_oa_config *oa_config; - struct xe_bb *bb; -}; - -#define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x - -static const struct xe_oa_format oa_formats[] = { - [XE_OA_FORMAT_C4_B8] = { 7, 64, DRM_FMT(OAG) }, - [XE_OA_FORMAT_A12] = { 0, 64, DRM_FMT(OAG) }, - [XE_OA_FORMAT_A12_B8_C8] = { 2, 128, DRM_FMT(OAG) }, - [XE_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, - [XE_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAR) }, - [XE_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, - [XE_OAC_FORMAT_A24u64_B8_C8] = { 1, 320, DRM_FMT(OAC), HDR_64_BIT }, - [XE_OAC_FORMAT_A22u32_R2u32_B8_C8] = { 2, 192, DRM_FMT(OAC), HDR_64_BIT }, - [XE_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, DRM_FMT(OAM_MPEC), HDR_64_BIT }, - [XE_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, DRM_FMT(OAM_MPEC), HDR_64_BIT }, - [XE_OA_FORMAT_PEC64u64] = { 1, 576, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, - [XE_OA_FORMAT_PEC64u64_B8_C8] = { 1, 640, DRM_FMT(PEC), HDR_64_BIT, 1, 1 }, - [XE_OA_FORMAT_PEC64u32] = { 1, 320, DRM_FMT(PEC), HDR_64_BIT }, - [XE_OA_FORMAT_PEC32u64_G1] = { 5, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, - [XE_OA_FORMAT_PEC32u32_G1] = { 5, 192, DRM_FMT(PEC), HDR_64_BIT }, - [XE_OA_FORMAT_PEC32u64_G2] = { 6, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, - [XE_OA_FORMAT_PEC32u32_G2] = { 6, 192, DRM_FMT(PEC), HDR_64_BIT }, - [XE_OA_FORMAT_PEC36u64_G1_32_G2_4] = { 3, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, - [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, -}; - -static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head) -{ - return tail >= head ? tail - head : - tail + stream->oa_buffer.circ_size - head; -} - -static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n) -{ - return ptr + n >= stream->oa_buffer.circ_size ? - ptr + n - stream->oa_buffer.circ_size : ptr + n; -} - -static void xe_oa_config_release(struct kref *ref) -{ - struct xe_oa_config *oa_config = - container_of(ref, typeof(*oa_config), ref); - - kfree(oa_config->regs); - - kfree_rcu(oa_config, rcu); -} - -static void xe_oa_config_put(struct xe_oa_config *oa_config) -{ - if (!oa_config) - return; - - kref_put(&oa_config->ref, xe_oa_config_release); -} - -static struct xe_oa_config *xe_oa_config_get(struct xe_oa_config *oa_config) -{ - return kref_get_unless_zero(&oa_config->ref) ? oa_config : NULL; -} - -static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_set) -{ - struct xe_oa_config *oa_config; - - rcu_read_lock(); - oa_config = idr_find(&oa->metrics_idr, metrics_set); - if (oa_config) - oa_config = xe_oa_config_get(oa_config); - rcu_read_unlock(); - - return oa_config; -} - -static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo) -{ - xe_oa_config_put(oa_bo->oa_config); - xe_bb_free(oa_bo->bb, NULL); - kfree(oa_bo); -} - -static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream) -{ - return &stream->hwe->oa_unit->regs; -} - -static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) -{ - return xe_mmio_read32(&stream->gt->mmio, __oa_regs(stream)->oa_tail_ptr) & - OAG_OATAILPTR_MASK; -} - -#define oa_report_header_64bit(__s) \ - ((__s)->oa_buffer.format->header == HDR_64_BIT) - -static u64 oa_report_id(struct xe_oa_stream *stream, void *report) -{ - return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; -} - -static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report) -{ - if (oa_report_header_64bit(stream)) - *(u64 *)report = 0; - else - *report = 0; -} - -static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) -{ - return oa_report_header_64bit(stream) ? - *((u64 *)report + 1) : - *((u32 *)report + 1); -} - -static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) -{ - if (oa_report_header_64bit(stream)) - *(u64 *)&report[2] = 0; - else - report[1] = 0; -} - -static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) -{ - u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - int report_size = stream->oa_buffer.format->size; - u32 tail, hw_tail; - unsigned long flags; - bool pollin; - u32 partial_report_size; - - spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - - hw_tail = xe_oa_hw_tail_read(stream); - hw_tail -= gtt_offset; - - /* - * The tail pointer increases in 64 byte (cacheline size), not in report_size - * increments. Also report size may not be a power of 2. Compute potential - * partially landed report in OA buffer. - */ - partial_report_size = xe_oa_circ_diff(stream, hw_tail, stream->oa_buffer.tail); - partial_report_size %= report_size; - - /* Subtract partial amount off the tail */ - hw_tail = xe_oa_circ_diff(stream, hw_tail, partial_report_size); - - tail = hw_tail; - - /* - * Walk the stream backward until we find a report with report id and timestamp - * not 0. We can't tell whether a report has fully landed in memory before the - * report id and timestamp of the following report have landed. - * - * This is assuming that the writes of the OA unit land in memory in the order - * they were written. If not : (╯°□°)╯︵ ┻━┻ - */ - while (xe_oa_circ_diff(stream, tail, stream->oa_buffer.tail) >= report_size) { - void *report = stream->oa_buffer.vaddr + tail; - - if (oa_report_id(stream, report) || oa_timestamp(stream, report)) - break; - - tail = xe_oa_circ_diff(stream, tail, report_size); - } - - if (xe_oa_circ_diff(stream, hw_tail, tail) > report_size) - drm_dbg(&stream->oa->xe->drm, - "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n", - stream->oa_buffer.head, tail, hw_tail); - - stream->oa_buffer.tail = tail; - - pollin = xe_oa_circ_diff(stream, stream->oa_buffer.tail, - stream->oa_buffer.head) >= report_size; - - spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - - return pollin; -} - -static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) -{ - struct xe_oa_stream *stream = - container_of(hrtimer, typeof(*stream), poll_check_timer); - - if (xe_oa_buffer_check_unlocked(stream)) { - stream->pollin = true; - wake_up(&stream->poll_wq); - } - - hrtimer_forward_now(hrtimer, ns_to_ktime(stream->poll_period_ns)); - - return HRTIMER_RESTART; -} - -static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf, - size_t count, size_t *offset, const u8 *report) -{ - int report_size = stream->oa_buffer.format->size; - int report_size_partial; - u8 *oa_buf_end; - - if ((count - *offset) < report_size) - return -ENOSPC; - - buf += *offset; - - oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; - report_size_partial = oa_buf_end - report; - - if (report_size_partial < report_size) { - if (copy_to_user(buf, report, report_size_partial)) - return -EFAULT; - buf += report_size_partial; - - if (copy_to_user(buf, stream->oa_buffer.vaddr, - report_size - report_size_partial)) - return -EFAULT; - } else if (copy_to_user(buf, report, report_size)) { - return -EFAULT; - } - - *offset += report_size; - - return 0; -} - -static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, - size_t count, size_t *offset) -{ - int report_size = stream->oa_buffer.format->size; - u8 *oa_buf_base = stream->oa_buffer.vaddr; - u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - size_t start_offset = *offset; - unsigned long flags; - u32 head, tail; - int ret = 0; - - spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - head = stream->oa_buffer.head; - tail = stream->oa_buffer.tail; - spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - - xe_assert(stream->oa->xe, - head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size); - - for (; xe_oa_circ_diff(stream, tail, head); - head = xe_oa_circ_incr(stream, head, report_size)) { - u8 *report = oa_buf_base + head; - - ret = xe_oa_append_report(stream, buf, count, offset, report); - if (ret) - break; - - if (!(stream->oa_buffer.circ_size % report_size)) { - /* Clear out report id and timestamp to detect unlanded reports */ - oa_report_id_clear(stream, (void *)report); - oa_timestamp_clear(stream, (void *)report); - } else { - u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; - u32 part = oa_buf_end - report; - - /* Zero out the entire report */ - if (report_size <= part) { - memset(report, 0, report_size); - } else { - memset(report, 0, part); - memset(oa_buf_base, 0, report_size - part); - } - } - } - - if (start_offset != *offset) { - struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr; - - spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - xe_mmio_write32(&stream->gt->mmio, oaheadptr, - (head + gtt_offset) & OAG_OAHEADPTR_MASK); - stream->oa_buffer.head = head; - spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - } - - return ret; -} - -static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) -{ - struct xe_mmio *mmio = &stream->gt->mmio; - u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT; - unsigned long flags; - - spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - - xe_mmio_write32(mmio, __oa_regs(stream)->oa_status, 0); - xe_mmio_write32(mmio, __oa_regs(stream)->oa_head_ptr, - gtt_offset & OAG_OAHEADPTR_MASK); - stream->oa_buffer.head = 0; - /* - * PRM says: "This MMIO must be set before the OATAILPTR register and after the - * OAHEADPTR register. This is to enable proper functionality of the overflow bit". - */ - xe_mmio_write32(mmio, __oa_regs(stream)->oa_buffer, oa_buf); - xe_mmio_write32(mmio, __oa_regs(stream)->oa_tail_ptr, - gtt_offset & OAG_OATAILPTR_MASK); - - /* Mark that we need updated tail pointer to read from */ - stream->oa_buffer.tail = 0; - - spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - - /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ - memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size); -} - -static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) -{ - return ((format->counter_select << (ffs(counter_sel_mask) - 1)) & counter_sel_mask) | - REG_FIELD_PREP(OA_OACONTROL_REPORT_BC_MASK, format->bc_report) | - REG_FIELD_PREP(OA_OACONTROL_COUNTER_SIZE_MASK, format->counter_size); -} - -static u32 __oa_ccs_select(struct xe_oa_stream *stream) -{ - u32 val; - - if (stream->hwe->class != XE_ENGINE_CLASS_COMPUTE) - return 0; - - val = REG_FIELD_PREP(OAG_OACONTROL_OA_CCS_SELECT_MASK, stream->hwe->instance); - xe_assert(stream->oa->xe, - REG_FIELD_GET(OAG_OACONTROL_OA_CCS_SELECT_MASK, val) == stream->hwe->instance); - return val; -} - -static void xe_oa_enable(struct xe_oa_stream *stream) -{ - const struct xe_oa_format *format = stream->oa_buffer.format; - const struct xe_oa_regs *regs; - u32 val; - - /* - * BSpec: 46822: Bit 0. Even if stream->sample is 0, for OAR to function, the OA - * buffer must be correctly initialized - */ - xe_oa_init_oa_buffer(stream); - - regs = __oa_regs(stream); - val = __format_to_oactrl(format, regs->oa_ctrl_counter_select_mask) | - __oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE; - - if (GRAPHICS_VER(stream->oa->xe) >= 20 && - stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) - val |= OAG_OACONTROL_OA_PES_DISAG_EN; - -<<<<<<< - xe_mmio_write32(&stream->gt->mmio, regs->oa_ctrl, val); -======= - xe_mmio_write32(stream->gt, regs->oa_ctrl, val); ->>>>>>> -} - -static void xe_oa_disable(struct xe_oa_stream *stream) -{ - struct xe_mmio *mmio = &stream->gt->mmio; - - xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctrl, 0); - if (xe_mmio_wait32(mmio, __oa_regs(stream)->oa_ctrl, - OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false)) - drm_err(&stream->oa->xe->drm, - "wait for OA to be disabled timed out\n"); - - if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) { - /* <= XE_METEORLAKE except XE_PVC */ - xe_mmio_write32(mmio, OA_TLB_INV_CR, 1); - if (xe_mmio_wait32(mmio, OA_TLB_INV_CR, 1, 0, 50000, NULL, false)) - drm_err(&stream->oa->xe->drm, - "wait for OA tlb invalidate timed out\n"); - } -} - -static int xe_oa_wait_unlocked(struct xe_oa_stream *stream) -{ - /* We might wait indefinitely if periodic sampling is not enabled */ - if (!stream->periodic) - return -EINVAL; - - return wait_event_interruptible(stream->poll_wq, - xe_oa_buffer_check_unlocked(stream)); -} - -#define OASTATUS_RELEVANT_BITS (OASTATUS_MMIO_TRG_Q_FULL | OASTATUS_COUNTER_OVERFLOW | \ - OASTATUS_BUFFER_OVERFLOW | OASTATUS_REPORT_LOST) - -static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf, - size_t count, size_t *offset) -{ - /* Only clear our bits to avoid side-effects */ - stream->oa_status = xe_mmio_rmw32(&stream->gt->mmio, __oa_regs(stream)->oa_status, - OASTATUS_RELEVANT_BITS, 0); - /* - * Signal to userspace that there is non-zero OA status to read via - * @DRM_XE_OBSERVATION_IOCTL_STATUS observation stream fd ioctl - */ - if (stream->oa_status & OASTATUS_RELEVANT_BITS) - return -EIO; - - return xe_oa_append_reports(stream, buf, count, offset); -} - -static ssize_t xe_oa_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - struct xe_oa_stream *stream = file->private_data; - size_t offset = 0; - int ret; - - /* Can't read from disabled streams */ - if (!stream->enabled || !stream->sample) - return -EINVAL; - - if (!(file->f_flags & O_NONBLOCK)) { - do { - ret = xe_oa_wait_unlocked(stream); - if (ret) - return ret; - - mutex_lock(&stream->stream_lock); - ret = __xe_oa_read(stream, buf, count, &offset); - mutex_unlock(&stream->stream_lock); - } while (!offset && !ret); - } else { - mutex_lock(&stream->stream_lock); - ret = __xe_oa_read(stream, buf, count, &offset); - mutex_unlock(&stream->stream_lock); - } - - /* - * Typically we clear pollin here in order to wait for the new hrtimer callback - * before unblocking. The exception to this is if __xe_oa_read returns -ENOSPC, - * which means that more OA data is available than could fit in the user provided - * buffer. In this case we want the next poll() call to not block. - * - * Also in case of -EIO, we have already waited for data before returning - * -EIO, so need to wait again - */ - if (ret != -ENOSPC && ret != -EIO) - stream->pollin = false; - - /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, -EINVAL, ... */ - return offset ?: (ret ?: -EAGAIN); -} - -static __poll_t xe_oa_poll_locked(struct xe_oa_stream *stream, - struct file *file, poll_table *wait) -{ - __poll_t events = 0; - - poll_wait(file, &stream->poll_wq, wait); - - /* - * We don't explicitly check whether there's something to read here since this - * path may be hot depending on what else userspace is polling, or on the timeout - * in use. We rely on hrtimer xe_oa_poll_check_timer_cb to notify us when there - * are samples to read - */ - if (stream->pollin) - events |= EPOLLIN; - - return events; -} - -static __poll_t xe_oa_poll(struct file *file, poll_table *wait) -{ - struct xe_oa_stream *stream = file->private_data; - __poll_t ret; - - mutex_lock(&stream->stream_lock); - ret = xe_oa_poll_locked(stream, file, wait); - mutex_unlock(&stream->stream_lock); - - return ret; -} - -static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) -{ - struct xe_sched_job *job; - struct dma_fence *fence; - long timeout; - int err = 0; - - /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */ - job = xe_bb_create_job(stream->k_exec_q, bb); - if (IS_ERR(job)) { - err = PTR_ERR(job); - goto exit; - } - - xe_sched_job_arm(job); - fence = dma_fence_get(&job->drm.s_fence->finished); - xe_sched_job_push(job); - - timeout = dma_fence_wait_timeout(fence, false, HZ); - dma_fence_put(fence); - if (timeout < 0) - err = timeout; - else if (!timeout) - err = -ETIME; -exit: - return err; -} - -static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs) -{ - u32 i; - -#define MI_LOAD_REGISTER_IMM_MAX_REGS (126) - - for (i = 0; i < n_regs; i++) { - if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { - u32 n_lri = min_t(u32, n_regs - i, - MI_LOAD_REGISTER_IMM_MAX_REGS); - - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(n_lri); - } - bb->cs[bb->len++] = reg_data[i].addr.addr; - bb->cs[bb->len++] = reg_data[i].value; - } -} - -static int num_lri_dwords(int num_regs) -{ - int count = 0; - - if (num_regs > 0) { - count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS); - count += num_regs * 2; - } - - return count; -} - -static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream) -{ - xe_bo_unpin_map_no_vm(stream->oa_buffer.bo); -} - -static void xe_oa_free_configs(struct xe_oa_stream *stream) -{ - struct xe_oa_config_bo *oa_bo, *tmp; - - xe_oa_config_put(stream->oa_config); - llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node) - free_oa_config_bo(oa_bo); -} - -static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, - struct xe_bb *bb, const struct flex *flex, u32 count) -{ - u32 offset = xe_bo_ggtt_addr(lrc->bo); - - do { - bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); - bb->cs[bb->len++] = offset + flex->offset * sizeof(u32); - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = flex->value; - - } while (flex++, --count); -} - -static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc, - const struct flex *flex, u32 count) -{ - struct xe_bb *bb; - int err; - - bb = xe_bb_new(stream->gt, 4 * count, false); - if (IS_ERR(bb)) { - err = PTR_ERR(bb); - goto exit; - } - - xe_oa_store_flex(stream, lrc, bb, flex, count); - - err = xe_oa_submit_bb(stream, bb); - xe_bb_free(bb, NULL); -exit: - return err; -} - -static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri) -{ - struct xe_bb *bb; - int err; - - bb = xe_bb_new(stream->gt, 3, false); - if (IS_ERR(bb)) { - err = PTR_ERR(bb); - goto exit; - } - - write_cs_mi_lri(bb, reg_lri, 1); - - err = xe_oa_submit_bb(stream, bb); - xe_bb_free(bb, NULL); -exit: - return err; -} - -static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) -{ - const struct xe_oa_format *format = stream->oa_buffer.format; - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); - u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | - (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); - - struct flex regs_context[] = { - { - OACTXCONTROL(stream->hwe->mmio_base), - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, - enable ? OA_COUNTER_RESUME : 0, - }, - { - RING_CONTEXT_CONTROL(stream->hwe->mmio_base), - regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, - enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) - }, - }; - struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol }; - int err; - - /* Modify stream hwe context image with regs_context */ - err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], - regs_context, ARRAY_SIZE(regs_context)); - if (err) - return err; - - /* Apply reg_lri using LRI */ - return xe_oa_load_with_lri(stream, ®_lri); -} - -static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) -{ - const struct xe_oa_format *format = stream->oa_buffer.format; - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); - u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | - (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); - struct flex regs_context[] = { - { - OACTXCONTROL(stream->hwe->mmio_base), - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, - enable ? OA_COUNTER_RESUME : 0, - }, - { - RING_CONTEXT_CONTROL(stream->hwe->mmio_base), - regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, - enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | - _MASKED_FIELD(CTX_CTRL_RUN_ALONE, - enable ? CTX_CTRL_RUN_ALONE : 0), - }, - }; - struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol }; - int err; - - /* Set ccs select to enable programming of OAC_OACONTROL */ - xe_mmio_write32(&stream->gt->mmio, __oa_regs(stream)->oa_ctrl, - __oa_ccs_select(stream)); - - /* Modify stream hwe context image with regs_context */ - err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], - regs_context, ARRAY_SIZE(regs_context)); - if (err) - return err; - - /* Apply reg_lri using LRI */ - return xe_oa_load_with_lri(stream, ®_lri); -} - -static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable) -{ - switch (stream->hwe->class) { - case XE_ENGINE_CLASS_RENDER: - return xe_oa_configure_oar_context(stream, enable); - case XE_ENGINE_CLASS_COMPUTE: - return xe_oa_configure_oac_context(stream, enable); - default: - /* Video engines do not support MI_REPORT_PERF_COUNT */ - return 0; - } -} - -#define HAS_OA_BPC_REPORTING(xe) (GRAPHICS_VERx100(xe) >= 1255) - -static u32 oag_configure_mmio_trigger(const struct xe_oa_stream *stream, bool enable) -{ - return _MASKED_FIELD(OAG_OA_DEBUG_DISABLE_MMIO_TRG, - enable && stream && stream->sample ? - 0 : OAG_OA_DEBUG_DISABLE_MMIO_TRG); -} - -static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) -{ - struct xe_mmio *mmio = &stream->gt->mmio; - u32 sqcnt1; - - /* - * Wa_1508761755:xehpsdv, dg2 - * Enable thread stall DOP gating and EU DOP gating. - */ - if (stream->oa->xe->info.platform == XE_DG2) { - xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, - _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE)); - xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, - _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); - } - - xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug, - oag_configure_mmio_trigger(stream, false)); - - /* disable the context save/restore or OAR counters */ - if (stream->exec_q) - xe_oa_configure_oa_context(stream, false); - - /* Make sure we disable noa to save power. */ - xe_mmio_rmw32(mmio, RPM_CONFIG1, GT_NOA_ENABLE, 0); - - sqcnt1 = SQCNT1_PMON_ENABLE | - (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); - - /* Reset PMON Enable to save power. */ - xe_mmio_rmw32(mmio, XELPMP_SQCNT1, sqcnt1, 0); -} - -static void xe_oa_stream_destroy(struct xe_oa_stream *stream) -{ - struct xe_oa_unit *u = stream->hwe->oa_unit; - struct xe_gt *gt = stream->hwe->gt; - - if (WARN_ON(stream != u->exclusive_stream)) - return; - - WRITE_ONCE(u->exclusive_stream, NULL); - - mutex_destroy(&stream->stream_lock); - - xe_oa_disable_metric_set(stream); - xe_exec_queue_put(stream->k_exec_q); - - xe_oa_free_oa_buffer(stream); - - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_pm_runtime_put(stream->oa->xe); - - /* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */ - if (stream->override_gucrc) - xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); - - xe_oa_free_configs(stream); -} - -static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) -{ - struct xe_bo *bo; - - BUILD_BUG_ON_NOT_POWER_OF_2(XE_OA_BUFFER_SIZE); - BUILD_BUG_ON(XE_OA_BUFFER_SIZE < SZ_128K || XE_OA_BUFFER_SIZE > SZ_16M); - - bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL, - XE_OA_BUFFER_SIZE, ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT); - if (IS_ERR(bo)) - return PTR_ERR(bo); - - stream->oa_buffer.bo = bo; - /* mmap implementation requires OA buffer to be in system memory */ - xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0); - stream->oa_buffer.vaddr = bo->vmap.vaddr; - return 0; -} - -static struct xe_oa_config_bo * -__xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_config) -{ - struct xe_oa_config_bo *oa_bo; - size_t config_length; - struct xe_bb *bb; - - oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL); - if (!oa_bo) - return ERR_PTR(-ENOMEM); - - config_length = num_lri_dwords(oa_config->regs_len); - config_length = ALIGN(sizeof(u32) * config_length, XE_PAGE_SIZE) / sizeof(u32); - - bb = xe_bb_new(stream->gt, config_length, false); - if (IS_ERR(bb)) - goto err_free; - - write_cs_mi_lri(bb, oa_config->regs, oa_config->regs_len); - - oa_bo->bb = bb; - oa_bo->oa_config = xe_oa_config_get(oa_config); - llist_add(&oa_bo->node, &stream->oa_config_bos); - - return oa_bo; -err_free: - kfree(oa_bo); - return ERR_CAST(bb); -} - -static struct xe_oa_config_bo * -xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_config) -{ - struct xe_oa_config_bo *oa_bo; - - /* Look for the buffer in the already allocated BOs attached to the stream */ - llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) { - if (oa_bo->oa_config == oa_config && - memcmp(oa_bo->oa_config->uuid, oa_config->uuid, - sizeof(oa_config->uuid)) == 0) - goto out; - } - - oa_bo = __xe_oa_alloc_config_buffer(stream, oa_config); -out: - return oa_bo; -} - -static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config) -{ -#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 - struct xe_oa_config_bo *oa_bo; - int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US; - - oa_bo = xe_oa_alloc_config_buffer(stream, config); - if (IS_ERR(oa_bo)) { - err = PTR_ERR(oa_bo); - goto exit; - } - - err = xe_oa_submit_bb(stream, oa_bo->bb); - - /* Additional empirical delay needed for NOA programming after registers are written */ - usleep_range(us, 2 * us); -exit: - return err; -} - -static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) -{ - /* If user didn't require OA reports, ask HW not to emit ctx switch reports */ - return _MASKED_FIELD(OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS, - stream->sample ? - 0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); -} - -static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) -{ - struct xe_mmio *mmio = &stream->gt->mmio; - u32 oa_debug, sqcnt1; - int ret; - - /* - * Wa_1508761755:xehpsdv, dg2 - * EU NOA signals behave incorrectly if EU clock gating is enabled. - * Disable thread stall DOP gating and EU DOP gating. - */ - if (stream->oa->xe->info.platform == XE_DG2) { - xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); - xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DISABLE_DOP_GATING)); - } - - /* Disable clk ratio reports */ - oa_debug = OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | - OAG_OA_DEBUG_INCLUDE_CLK_RATIO; - - if (GRAPHICS_VER(stream->oa->xe) >= 20) - oa_debug |= - /* The three bits below are needed to get PEC counters running */ - OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL | - OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL | - OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL; - - xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug, - _MASKED_BIT_ENABLE(oa_debug) | - oag_report_ctx_switches(stream) | - oag_configure_mmio_trigger(stream, true)); - - xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ? - (OAG_OAGLBCTXCTRL_COUNTER_RESUME | - OAG_OAGLBCTXCTRL_TIMER_ENABLE | - REG_FIELD_PREP(OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK, - stream->period_exponent)) : 0); - - /* - * Initialize Super Queue Internal Cnt Register - * Set PMON Enable in order to collect valid metrics - * Enable bytes per clock reporting - */ - sqcnt1 = SQCNT1_PMON_ENABLE | - (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); - - xe_mmio_rmw32(mmio, XELPMP_SQCNT1, 0, sqcnt1); - - /* Configure OAR/OAC */ - if (stream->exec_q) { - ret = xe_oa_configure_oa_context(stream, true); - if (ret) - return ret; - } - - return xe_oa_emit_oa_config(stream, stream->oa_config); -} - -static void xe_oa_stream_enable(struct xe_oa_stream *stream) -{ - stream->pollin = false; - - xe_oa_enable(stream); - - if (stream->sample) - hrtimer_start(&stream->poll_check_timer, - ns_to_ktime(stream->poll_period_ns), - HRTIMER_MODE_REL_PINNED); -} - -static void xe_oa_stream_disable(struct xe_oa_stream *stream) -{ - xe_oa_disable(stream); - - if (stream->sample) - hrtimer_cancel(&stream->poll_check_timer); -} - -static int xe_oa_enable_preempt_timeslice(struct xe_oa_stream *stream) -{ - struct xe_exec_queue *q = stream->exec_q; - int ret1, ret2; - - /* Best effort recovery: try to revert both to original, irrespective of error */ - ret1 = q->ops->set_timeslice(q, stream->hwe->eclass->sched_props.timeslice_us); - ret2 = q->ops->set_preempt_timeout(q, stream->hwe->eclass->sched_props.preempt_timeout_us); - if (ret1 || ret2) - goto err; - return 0; -err: - drm_dbg(&stream->oa->xe->drm, "%s failed ret1 %d ret2 %d\n", __func__, ret1, ret2); - return ret1 ?: ret2; -} - -static int xe_oa_disable_preempt_timeslice(struct xe_oa_stream *stream) -{ - struct xe_exec_queue *q = stream->exec_q; - int ret; - - /* Setting values to 0 will disable timeslice and preempt_timeout */ - ret = q->ops->set_timeslice(q, 0); - if (ret) - goto err; - - ret = q->ops->set_preempt_timeout(q, 0); - if (ret) - goto err; - - return 0; -err: - xe_oa_enable_preempt_timeslice(stream); - drm_dbg(&stream->oa->xe->drm, "%s failed %d\n", __func__, ret); - return ret; -} - -static int xe_oa_enable_locked(struct xe_oa_stream *stream) -{ - if (stream->enabled) - return 0; - - if (stream->no_preempt) { - int ret = xe_oa_disable_preempt_timeslice(stream); - - if (ret) - return ret; - } - - xe_oa_stream_enable(stream); - - stream->enabled = true; - return 0; -} - -static int xe_oa_disable_locked(struct xe_oa_stream *stream) -{ - int ret = 0; - - if (!stream->enabled) - return 0; - - xe_oa_stream_disable(stream); - - if (stream->no_preempt) - ret = xe_oa_enable_preempt_timeslice(stream); - - stream->enabled = false; - return ret; -} - -static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) -{ - struct drm_xe_ext_set_property ext; - long ret = stream->oa_config->id; - struct xe_oa_config *config; - int err; - - err = __copy_from_user(&ext, u64_to_user_ptr(arg), sizeof(ext)); - if (XE_IOCTL_DBG(stream->oa->xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(stream->oa->xe, ext.pad) || - XE_IOCTL_DBG(stream->oa->xe, ext.base.name != DRM_XE_OA_EXTENSION_SET_PROPERTY) || - XE_IOCTL_DBG(stream->oa->xe, ext.base.next_extension) || - XE_IOCTL_DBG(stream->oa->xe, ext.property != DRM_XE_OA_PROPERTY_OA_METRIC_SET)) - return -EINVAL; - - config = xe_oa_get_oa_config(stream->oa, ext.value); - if (!config) - return -ENODEV; - - if (config != stream->oa_config) { - err = xe_oa_emit_oa_config(stream, config); - if (!err) - config = xchg(&stream->oa_config, config); - else - ret = err; - } - - xe_oa_config_put(config); - - return ret; -} - -static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) -{ - struct drm_xe_oa_stream_status status = {}; - void __user *uaddr = (void __user *)arg; - - /* Map from register to uapi bits */ - if (stream->oa_status & OASTATUS_REPORT_LOST) - status.oa_status |= DRM_XE_OASTATUS_REPORT_LOST; - if (stream->oa_status & OASTATUS_BUFFER_OVERFLOW) - status.oa_status |= DRM_XE_OASTATUS_BUFFER_OVERFLOW; - if (stream->oa_status & OASTATUS_COUNTER_OVERFLOW) - status.oa_status |= DRM_XE_OASTATUS_COUNTER_OVERFLOW; - if (stream->oa_status & OASTATUS_MMIO_TRG_Q_FULL) - status.oa_status |= DRM_XE_OASTATUS_MMIO_TRG_Q_FULL; - - if (copy_to_user(uaddr, &status, sizeof(status))) - return -EFAULT; - - return 0; -} - -static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) -{ - struct drm_xe_oa_stream_info info = { .oa_buf_size = XE_OA_BUFFER_SIZE, }; - void __user *uaddr = (void __user *)arg; - - if (copy_to_user(uaddr, &info, sizeof(info))) - return -EFAULT; - - return 0; -} - -static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, - unsigned int cmd, - unsigned long arg) -{ - switch (cmd) { - case DRM_XE_OBSERVATION_IOCTL_ENABLE: - return xe_oa_enable_locked(stream); - case DRM_XE_OBSERVATION_IOCTL_DISABLE: - return xe_oa_disable_locked(stream); - case DRM_XE_OBSERVATION_IOCTL_CONFIG: - return xe_oa_config_locked(stream, arg); - case DRM_XE_OBSERVATION_IOCTL_STATUS: - return xe_oa_status_locked(stream, arg); - case DRM_XE_OBSERVATION_IOCTL_INFO: - return xe_oa_info_locked(stream, arg); - } - - return -EINVAL; -} - -static long xe_oa_ioctl(struct file *file, - unsigned int cmd, - unsigned long arg) -{ - struct xe_oa_stream *stream = file->private_data; - long ret; - - mutex_lock(&stream->stream_lock); - ret = xe_oa_ioctl_locked(stream, cmd, arg); - mutex_unlock(&stream->stream_lock); - - return ret; -} - -static void xe_oa_destroy_locked(struct xe_oa_stream *stream) -{ - if (stream->enabled) - xe_oa_disable_locked(stream); - - xe_oa_stream_destroy(stream); - - if (stream->exec_q) - xe_exec_queue_put(stream->exec_q); - - kfree(stream); -} - -static int xe_oa_release(struct inode *inode, struct file *file) -{ - struct xe_oa_stream *stream = file->private_data; - struct xe_gt *gt = stream->gt; - - mutex_lock(>->oa.gt_lock); - xe_oa_destroy_locked(stream); - mutex_unlock(>->oa.gt_lock); - - /* Release the reference the OA stream kept on the driver */ - drm_dev_put(>_to_xe(gt)->drm); - - return 0; -} - -static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) -{ - struct xe_oa_stream *stream = file->private_data; - struct xe_bo *bo = stream->oa_buffer.bo; - unsigned long start = vma->vm_start; - int i, ret; - - if (xe_observation_paranoid && !perfmon_capable()) { - drm_dbg(&stream->oa->xe->drm, "Insufficient privilege to map OA buffer\n"); - return -EACCES; - } - - /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ - if (vma->vm_end - vma->vm_start != XE_OA_BUFFER_SIZE) { - drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); - return -EINVAL; - } - - /* - * Only support VM_READ, enforce MAP_PRIVATE by checking for - * VM_MAYSHARE, don't copy the vma on fork - */ - if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_SHARED | VM_MAYSHARE)) { - drm_dbg(&stream->oa->xe->drm, "mmap must be read only\n"); - return -EINVAL; - } - vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY, - VM_MAYWRITE | VM_MAYEXEC); - - xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == vma_pages(vma)); - for (i = 0; i < bo->ttm.ttm->num_pages; i++) { - ret = remap_pfn_range(vma, start, page_to_pfn(bo->ttm.ttm->pages[i]), - PAGE_SIZE, vma->vm_page_prot); - if (ret) - break; - - start += PAGE_SIZE; - } - - return ret; -} - -static const struct file_operations xe_oa_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .release = xe_oa_release, - .poll = xe_oa_poll, - .read = xe_oa_read, - .unlocked_ioctl = xe_oa_ioctl, - .mmap = xe_oa_mmap, -}; - -static bool engine_supports_mi_query(struct xe_hw_engine *hwe) -{ - return hwe->class == XE_ENGINE_CLASS_RENDER || - hwe->class == XE_ENGINE_CLASS_COMPUTE; -} - -static bool xe_oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end) -{ - u32 idx = *offset; - u32 len = min(MI_LRI_LEN(state[idx]) + idx, end); - bool found = false; - - idx++; - for (; idx < len; idx += 2) { - if (state[idx] == reg) { - found = true; - break; - } - } - - *offset = idx; - return found; -} - -#define IS_MI_LRI_CMD(x) (REG_FIELD_GET(MI_OPCODE, (x)) == \ - REG_FIELD_GET(MI_OPCODE, MI_LOAD_REGISTER_IMM)) - -static u32 xe_oa_context_image_offset(struct xe_oa_stream *stream, u32 reg) -{ - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 len = (xe_gt_lrc_size(stream->gt, stream->hwe->class) + - lrc->ring.size) / sizeof(u32); - u32 offset = xe_lrc_regs_offset(lrc) / sizeof(u32); - u32 *state = (u32 *)lrc->bo->vmap.vaddr; - - if (drm_WARN_ON(&stream->oa->xe->drm, !state)) - return U32_MAX; - - for (; offset < len; ) { - if (IS_MI_LRI_CMD(state[offset])) { - /* - * We expect reg-value pairs in MI_LRI command, so - * MI_LRI_LEN() should be even - */ - drm_WARN_ON(&stream->oa->xe->drm, - MI_LRI_LEN(state[offset]) & 0x1); - - if (xe_oa_find_reg_in_lri(state, reg, &offset, len)) - break; - } else { - offset++; - } - } - - return offset < len ? offset : U32_MAX; -} - -static int xe_oa_set_ctx_ctrl_offset(struct xe_oa_stream *stream) -{ - struct xe_reg reg = OACTXCONTROL(stream->hwe->mmio_base); - u32 offset = stream->oa->ctx_oactxctrl_offset[stream->hwe->class]; - - /* Do this only once. Failure is stored as offset of U32_MAX */ - if (offset) - goto exit; - - offset = xe_oa_context_image_offset(stream, reg.addr); - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] = offset; - - drm_dbg(&stream->oa->xe->drm, "%s oa ctx control at 0x%08x dword offset\n", - stream->hwe->name, offset); -exit: - return offset && offset != U32_MAX ? 0 : -ENODEV; -} - -static int xe_oa_stream_init(struct xe_oa_stream *stream, - struct xe_oa_open_param *param) -{ - struct xe_oa_unit *u = param->hwe->oa_unit; - struct xe_gt *gt = param->hwe->gt; - int ret; - - stream->exec_q = param->exec_q; - stream->poll_period_ns = DEFAULT_POLL_PERIOD_NS; - stream->hwe = param->hwe; - stream->gt = stream->hwe->gt; - stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format]; - - stream->sample = param->sample; - stream->periodic = param->period_exponent > 0; - stream->period_exponent = param->period_exponent; - stream->no_preempt = param->no_preempt; - - /* - * For Xe2+, when overrun mode is enabled, there are no partial reports at the end - * of buffer, making the OA buffer effectively a non-power-of-2 size circular - * buffer whose size, circ_size, is a multiple of the report size - */ - if (GRAPHICS_VER(stream->oa->xe) >= 20 && - stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) - stream->oa_buffer.circ_size = - XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size; - else - stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE; - - if (stream->exec_q && engine_supports_mi_query(stream->hwe)) { - /* If we don't find the context offset, just return error */ - ret = xe_oa_set_ctx_ctrl_offset(stream); - if (ret) { - drm_err(&stream->oa->xe->drm, - "xe_oa_set_ctx_ctrl_offset failed for %s\n", - stream->hwe->name); - goto exit; - } - } - - stream->oa_config = xe_oa_get_oa_config(stream->oa, param->metric_set); - if (!stream->oa_config) { - drm_dbg(&stream->oa->xe->drm, "Invalid OA config id=%i\n", param->metric_set); - ret = -EINVAL; - goto exit; - } - - /* - * Wa_1509372804:pvc - * - * GuC reset of engines causes OA to lose configuration - * state. Prevent this by overriding GUCRC mode. - */ - if (stream->oa->xe->info.platform == XE_PVC) { - ret = xe_guc_pc_override_gucrc_mode(>->uc.guc.pc, - SLPC_GUCRC_MODE_GUCRC_NO_RC6); - if (ret) - goto err_free_configs; - - stream->override_gucrc = true; - } - - /* Take runtime pm ref and forcewake to disable RC6 */ - xe_pm_runtime_get(stream->oa->xe); - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - - ret = xe_oa_alloc_oa_buffer(stream); - if (ret) - goto err_fw_put; - - stream->k_exec_q = xe_exec_queue_create(stream->oa->xe, NULL, - BIT(stream->hwe->logical_instance), 1, - stream->hwe, EXEC_QUEUE_FLAG_KERNEL, 0); - if (IS_ERR(stream->k_exec_q)) { - ret = PTR_ERR(stream->k_exec_q); - drm_err(&stream->oa->xe->drm, "gt%d, hwe %s, xe_exec_queue_create failed=%d", - stream->gt->info.id, stream->hwe->name, ret); - goto err_free_oa_buf; - } - - ret = xe_oa_enable_metric_set(stream); - if (ret) { - drm_dbg(&stream->oa->xe->drm, "Unable to enable metric set\n"); - goto err_put_k_exec_q; - } - - drm_dbg(&stream->oa->xe->drm, "opening stream oa config uuid=%s\n", - stream->oa_config->uuid); - - WRITE_ONCE(u->exclusive_stream, stream); - - hrtimer_init(&stream->poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - stream->poll_check_timer.function = xe_oa_poll_check_timer_cb; - init_waitqueue_head(&stream->poll_wq); - - spin_lock_init(&stream->oa_buffer.ptr_lock); - mutex_init(&stream->stream_lock); - - return 0; - -err_put_k_exec_q: - xe_oa_disable_metric_set(stream); - xe_exec_queue_put(stream->k_exec_q); -err_free_oa_buf: - xe_oa_free_oa_buffer(stream); -err_fw_put: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); - xe_pm_runtime_put(stream->oa->xe); - if (stream->override_gucrc) - xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); -err_free_configs: - xe_oa_free_configs(stream); -exit: - return ret; -} - -static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, - struct xe_oa_open_param *param) -{ - struct xe_oa_stream *stream; - int stream_fd; - int ret; - - /* We currently only allow exclusive access */ - if (param->hwe->oa_unit->exclusive_stream) { - drm_dbg(&oa->xe->drm, "OA unit already in use\n"); - ret = -EBUSY; - goto exit; - } - - stream = kzalloc(sizeof(*stream), GFP_KERNEL); - if (!stream) { - ret = -ENOMEM; - goto exit; - } - - stream->oa = oa; - ret = xe_oa_stream_init(stream, param); - if (ret) - goto err_free; - - if (!param->disabled) { - ret = xe_oa_enable_locked(stream); - if (ret) - goto err_destroy; - } - - stream_fd = anon_inode_getfd("[xe_oa]", &xe_oa_fops, stream, 0); - if (stream_fd < 0) { - ret = stream_fd; - goto err_disable; - } - - /* Hold a reference on the drm device till stream_fd is released */ - drm_dev_get(&stream->oa->xe->drm); - - return stream_fd; -err_disable: - if (!param->disabled) - xe_oa_disable_locked(stream); -err_destroy: - xe_oa_stream_destroy(stream); -err_free: - kfree(stream); -exit: - return ret; -} - -/** - * xe_oa_timestamp_frequency - Return OA timestamp frequency - * @gt: @xe_gt - * - * OA timestamp frequency = CS timestamp frequency in most platforms. On some - * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such - * cases, return the adjusted CS timestamp frequency to the user. - */ -u32 xe_oa_timestamp_frequency(struct xe_gt *gt) -{ - u32 reg, shift; - - /* - * Wa_18013179988:dg2 - * Wa_14015568240:pvc - * Wa_14015846243:mtl - */ - switch (gt_to_xe(gt)->info.platform) { - case XE_DG2: - case XE_PVC: - case XE_METEORLAKE: - xe_pm_runtime_get(gt_to_xe(gt)); - reg = xe_mmio_read32(>->mmio, RPM_CONFIG0); - xe_pm_runtime_put(gt_to_xe(gt)); - - shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); - return gt->info.reference_clock << (3 - shift); - - default: - return gt->info.reference_clock; - } -} - -static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent) -{ - u64 nom = (2ULL << exponent) * NSEC_PER_SEC; - u32 den = xe_oa_timestamp_frequency(gt); - - return div_u64(nom + den - 1, den); -} - -static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type) -{ - switch (hwe->oa_unit->type) { - case DRM_XE_OA_UNIT_TYPE_OAG: - return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR || - type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; - case DRM_XE_OA_UNIT_TYPE_OAM: - return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; - default: - return false; - } -} - -static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name) -{ - u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt); - u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt); - u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); - u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt); - int idx; - - for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) { - const struct xe_oa_format *f = &oa->oa_formats[idx]; - - if (counter_size == f->counter_size && bc_report == f->bc_report && - type == f->type && counter_sel == f->counter_select) { - *name = idx; - return 0; - } - } - - return -EINVAL; -} - -/** - * xe_oa_unit_id - Return OA unit ID for a hardware engine - * @hwe: @xe_hw_engine - * - * Return OA unit ID for a hardware engine when available - */ -u16 xe_oa_unit_id(struct xe_hw_engine *hwe) -{ - return hwe->oa_unit && hwe->oa_unit->num_engines ? - hwe->oa_unit->oa_unit_id : U16_MAX; -} - -static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) -{ - struct xe_gt *gt; - int i, ret = 0; - - if (param->exec_q) { - /* When we have an exec_q, get hwe from the exec_q */ - param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class, - param->engine_instance, true); - } else { - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - - /* Else just get the first hwe attached to the oa unit */ - for_each_gt(gt, oa->xe, i) { - for_each_hw_engine(hwe, gt, id) { - if (xe_oa_unit_id(hwe) == param->oa_unit_id) { - param->hwe = hwe; - goto out; - } - } - } - } -out: - if (!param->hwe || xe_oa_unit_id(param->hwe) != param->oa_unit_id) { - drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", - param->exec_q ? param->exec_q->class : -1, - param->engine_instance, param->oa_unit_id); - ret = -EINVAL; - } - - return ret; -} - -static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - if (value >= oa->oa_unit_ids) { - drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); - return -EINVAL; - } - param->oa_unit_id = value; - return 0; -} - -static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->sample = value; - return 0; -} - -static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->metric_set = value; - return 0; -} - -static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - int ret = decode_oa_format(oa, value, ¶m->oa_format); - - if (ret) { - drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value); - return ret; - } - return 0; -} - -static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ -#define OA_EXPONENT_MAX 31 - - if (value > OA_EXPONENT_MAX) { - drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX); - return -EINVAL; - } - param->period_exponent = value; - return 0; -} - -static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->disabled = value; - return 0; -} - -static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->exec_queue_id = value; - return 0; -} - -static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->engine_instance = value; - return 0; -} - -static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->no_preempt = value; - return 0; -} - -typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param); -static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { - [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id, - [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa, - [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, - [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format, - [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent, - [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, - [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, - [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, - [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, -}; - -static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension, - struct xe_oa_open_param *param) -{ - u64 __user *address = u64_to_user_ptr(extension); - struct drm_xe_ext_set_property ext; - int err; - u32 idx; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(oa->xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs)) || - XE_IOCTL_DBG(oa->xe, ext.pad)) - return -EINVAL; - - idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs)); - return xe_oa_set_property_funcs[idx](oa, ext.value, param); -} - -typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, u64 extension, - struct xe_oa_open_param *param); -static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { - [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, -}; - -#define MAX_USER_EXTENSIONS 16 -static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number, - struct xe_oa_open_param *param) -{ - u64 __user *address = u64_to_user_ptr(extension); - struct drm_xe_user_extension ext; - int err; - u32 idx; - - if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) - return -E2BIG; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(oa->xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(oa->xe, ext.pad) || - XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs))) - return -EINVAL; - - idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs)); - err = xe_oa_user_extension_funcs[idx](oa, extension, param); - if (XE_IOCTL_DBG(oa->xe, err)) - return err; - - if (ext.next_extension) - return xe_oa_user_extensions(oa, ext.next_extension, ++ext_number, param); - - return 0; -} - -/** - * xe_oa_stream_open_ioctl - Opens an OA stream - * @dev: @drm_device - * @data: pointer to struct @drm_xe_oa_config - * @file: @drm_file - * - * The functions opens an OA stream. An OA stream, opened with specified - * properties, enables OA counter samples to be collected, either - * periodically (time based sampling), or on request (using OA queries) - */ -int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_oa *oa = &xe->oa; - struct xe_file *xef = to_xe_file(file); - struct xe_oa_open_param param = {}; - const struct xe_oa_format *f; - bool privileged_op = true; - int ret; - - if (!oa->xe) { - drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); - return -ENODEV; - } - - ret = xe_oa_user_extensions(oa, data, 0, ¶m); - if (ret) - return ret; - - if (param.exec_queue_id > 0) { - param.exec_q = xe_exec_queue_lookup(xef, param.exec_queue_id); - if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) - return -ENOENT; - - if (param.exec_q->width > 1) - drm_dbg(&oa->xe->drm, "exec_q->width > 1, programming only exec_q->lrc[0]\n"); - } - - /* - * Query based sampling (using MI_REPORT_PERF_COUNT) with OAR/OAC, - * without global stream access, can be an unprivileged operation - */ - if (param.exec_q && !param.sample) - privileged_op = false; - - if (param.no_preempt) { - if (!param.exec_q) { - drm_dbg(&oa->xe->drm, "Preemption disable without exec_q!\n"); - ret = -EINVAL; - goto err_exec_q; - } - privileged_op = true; - } - - if (privileged_op && xe_observation_paranoid && !perfmon_capable()) { - drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe OA stream\n"); - ret = -EACCES; - goto err_exec_q; - } - - if (!param.exec_q && !param.sample) { - drm_dbg(&oa->xe->drm, "Only OA report sampling supported\n"); - ret = -EINVAL; - goto err_exec_q; - } - - ret = xe_oa_assign_hwe(oa, ¶m); - if (ret) - goto err_exec_q; - - f = &oa->oa_formats[param.oa_format]; - if (!param.oa_format || !f->size || - !engine_supports_oa_format(param.hwe, f->type)) { - drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n", - param.oa_format, f->type, f->size, param.hwe->class); - ret = -EINVAL; - goto err_exec_q; - } - - if (param.period_exponent > 0) { - u64 oa_period, oa_freq_hz; - - /* Requesting samples from OAG buffer is a privileged operation */ - if (!param.sample) { - drm_dbg(&oa->xe->drm, "OA_EXPONENT specified without SAMPLE_OA\n"); - ret = -EINVAL; - goto err_exec_q; - } - oa_period = oa_exponent_to_ns(param.hwe->gt, param.period_exponent); - oa_freq_hz = div64_u64(NSEC_PER_SEC, oa_period); - drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); - } - - mutex_lock(¶m.hwe->gt->oa.gt_lock); - ret = xe_oa_stream_open_ioctl_locked(oa, ¶m); - mutex_unlock(¶m.hwe->gt->oa.gt_lock); -err_exec_q: - if (ret < 0 && param.exec_q) - xe_exec_queue_put(param.exec_q); - return ret; -} - -static bool xe_oa_is_valid_flex_addr(struct xe_oa *oa, u32 addr) -{ - static const struct xe_reg flex_eu_regs[] = { - EU_PERF_CNTL0, - EU_PERF_CNTL1, - EU_PERF_CNTL2, - EU_PERF_CNTL3, - EU_PERF_CNTL4, - EU_PERF_CNTL5, - EU_PERF_CNTL6, - }; - int i; - - for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { - if (flex_eu_regs[i].addr == addr) - return true; - } - return false; -} - -static bool xe_oa_reg_in_range_table(u32 addr, const struct xe_mmio_range *table) -{ - while (table->start && table->end) { - if (addr >= table->start && addr <= table->end) - return true; - - table++; - } - - return false; -} - -static const struct xe_mmio_range xehp_oa_b_counters[] = { - { .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */ - { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */ - {} -}; - -static const struct xe_mmio_range gen12_oa_b_counters[] = { - { .start = 0x2b2c, .end = 0x2b2c }, /* OAG_OA_PESS */ - { .start = 0xd900, .end = 0xd91c }, /* OAG_OASTARTTRIG[1-8] */ - { .start = 0xd920, .end = 0xd93c }, /* OAG_OAREPORTTRIG1[1-8] */ - { .start = 0xd940, .end = 0xd97c }, /* OAG_CEC[0-7][0-1] */ - { .start = 0xdc00, .end = 0xdc3c }, /* OAG_SCEC[0-7][0-1] */ - { .start = 0xdc40, .end = 0xdc40 }, /* OAG_SPCTR_CNF */ - { .start = 0xdc44, .end = 0xdc44 }, /* OAA_DBG_REG */ - {} -}; - -static const struct xe_mmio_range mtl_oam_b_counters[] = { - { .start = 0x393000, .end = 0x39301c }, /* OAM_STARTTRIG1[1-8] */ - { .start = 0x393020, .end = 0x39303c }, /* OAM_REPORTTRIG1[1-8] */ - { .start = 0x393040, .end = 0x39307c }, /* OAM_CEC[0-7][0-1] */ - { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */ - {} -}; - -static const struct xe_mmio_range xe2_oa_b_counters[] = { - { .start = 0x393200, .end = 0x39323C }, /* MPES_0_MPES_SAG - MPES_7_UPPER_MPES_SAG */ - { .start = 0x394200, .end = 0x39423C }, /* MPES_0_MPES_SCMI0 - MPES_7_UPPER_MPES_SCMI0 */ - { .start = 0x394A00, .end = 0x394A3C }, /* MPES_0_MPES_SCMI1 - MPES_7_UPPER_MPES_SCMI1 */ - {}, -}; - -static bool xe_oa_is_valid_b_counter_addr(struct xe_oa *oa, u32 addr) -{ - return xe_oa_reg_in_range_table(addr, xehp_oa_b_counters) || - xe_oa_reg_in_range_table(addr, gen12_oa_b_counters) || - xe_oa_reg_in_range_table(addr, mtl_oam_b_counters) || - (GRAPHICS_VER(oa->xe) >= 20 && - xe_oa_reg_in_range_table(addr, xe2_oa_b_counters)); -} - -static const struct xe_mmio_range mtl_oa_mux_regs[] = { - { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ - { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ - { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ - { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ - { .start = 0x38d100, .end = 0x38d114}, /* VISACTL */ - {} -}; - -static const struct xe_mmio_range gen12_oa_mux_regs[] = { - { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ - { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ - { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ - { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ - { .start = 0x20cc, .end = 0x20cc }, /* WAIT_FOR_RC6_EXIT */ - {} -}; - -static const struct xe_mmio_range xe2_oa_mux_regs[] = { - { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ - { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ - { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ - { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ - { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ - { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ - {}, -}; - -static bool xe_oa_is_valid_mux_addr(struct xe_oa *oa, u32 addr) -{ - if (GRAPHICS_VER(oa->xe) >= 20) - return xe_oa_reg_in_range_table(addr, xe2_oa_mux_regs); - else if (GRAPHICS_VERx100(oa->xe) >= 1270) - return xe_oa_reg_in_range_table(addr, mtl_oa_mux_regs); - else - return xe_oa_reg_in_range_table(addr, gen12_oa_mux_regs); -} - -static bool xe_oa_is_valid_config_reg_addr(struct xe_oa *oa, u32 addr) -{ - return xe_oa_is_valid_flex_addr(oa, addr) || - xe_oa_is_valid_b_counter_addr(oa, addr) || - xe_oa_is_valid_mux_addr(oa, addr); -} - -static struct xe_oa_reg * -xe_oa_alloc_regs(struct xe_oa *oa, bool (*is_valid)(struct xe_oa *oa, u32 addr), - u32 __user *regs, u32 n_regs) -{ - struct xe_oa_reg *oa_regs; - int err; - u32 i; - - oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL); - if (!oa_regs) - return ERR_PTR(-ENOMEM); - - for (i = 0; i < n_regs; i++) { - u32 addr, value; - - err = get_user(addr, regs); - if (err) - goto addr_err; - - if (!is_valid(oa, addr)) { - drm_dbg(&oa->xe->drm, "Invalid oa_reg address: %X\n", addr); - err = -EINVAL; - goto addr_err; - } - - err = get_user(value, regs + 1); - if (err) - goto addr_err; - - oa_regs[i].addr = XE_REG(addr); - oa_regs[i].value = value; - - regs += 2; - } - - return oa_regs; - -addr_err: - kfree(oa_regs); - return ERR_PTR(err); -} - -static ssize_t show_dynamic_id(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) -{ - struct xe_oa_config *oa_config = - container_of(attr, typeof(*oa_config), sysfs_metric_id); - - return sysfs_emit(buf, "%d\n", oa_config->id); -} - -static int create_dynamic_oa_sysfs_entry(struct xe_oa *oa, - struct xe_oa_config *oa_config) -{ - sysfs_attr_init(&oa_config->sysfs_metric_id.attr); - oa_config->sysfs_metric_id.attr.name = "id"; - oa_config->sysfs_metric_id.attr.mode = 0444; - oa_config->sysfs_metric_id.show = show_dynamic_id; - oa_config->sysfs_metric_id.store = NULL; - - oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr; - oa_config->attrs[1] = NULL; - - oa_config->sysfs_metric.name = oa_config->uuid; - oa_config->sysfs_metric.attrs = oa_config->attrs; - - return sysfs_create_group(oa->metrics_kobj, &oa_config->sysfs_metric); -} - -/** - * xe_oa_add_config_ioctl - Adds one OA config - * @dev: @drm_device - * @data: pointer to struct @drm_xe_oa_config - * @file: @drm_file - * - * The functions adds an OA config to the set of OA configs maintained in - * the kernel. The config determines which OA metrics are collected for an - * OA stream. - */ -int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_oa *oa = &xe->oa; - struct drm_xe_oa_config param; - struct drm_xe_oa_config *arg = ¶m; - struct xe_oa_config *oa_config, *tmp; - struct xe_oa_reg *regs; - int err, id; - - if (!oa->xe) { - drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); - return -ENODEV; - } - - if (xe_observation_paranoid && !perfmon_capable()) { - drm_dbg(&oa->xe->drm, "Insufficient privileges to add xe OA config\n"); - return -EACCES; - } - - err = __copy_from_user(¶m, u64_to_user_ptr(data), sizeof(param)); - if (XE_IOCTL_DBG(oa->xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(oa->xe, arg->extensions) || - XE_IOCTL_DBG(oa->xe, !arg->regs_ptr) || - XE_IOCTL_DBG(oa->xe, !arg->n_regs)) - return -EINVAL; - - oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); - if (!oa_config) - return -ENOMEM; - - oa_config->oa = oa; - kref_init(&oa_config->ref); - - if (!uuid_is_valid(arg->uuid)) { - drm_dbg(&oa->xe->drm, "Invalid uuid format for OA config\n"); - err = -EINVAL; - goto reg_err; - } - - /* Last character in oa_config->uuid will be 0 because oa_config is kzalloc */ - memcpy(oa_config->uuid, arg->uuid, sizeof(arg->uuid)); - - oa_config->regs_len = arg->n_regs; - regs = xe_oa_alloc_regs(oa, xe_oa_is_valid_config_reg_addr, - u64_to_user_ptr(arg->regs_ptr), - arg->n_regs); - if (IS_ERR(regs)) { - drm_dbg(&oa->xe->drm, "Failed to create OA config for mux_regs\n"); - err = PTR_ERR(regs); - goto reg_err; - } - oa_config->regs = regs; - - err = mutex_lock_interruptible(&oa->metrics_lock); - if (err) - goto reg_err; - - /* We shouldn't have too many configs, so this iteration shouldn't be too costly */ - idr_for_each_entry(&oa->metrics_idr, tmp, id) { - if (!strcmp(tmp->uuid, oa_config->uuid)) { - drm_dbg(&oa->xe->drm, "OA config already exists with this uuid\n"); - err = -EADDRINUSE; - goto sysfs_err; - } - } - - err = create_dynamic_oa_sysfs_entry(oa, oa_config); - if (err) { - drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n"); - goto sysfs_err; - } - - oa_config->id = idr_alloc(&oa->metrics_idr, oa_config, 1, 0, GFP_KERNEL); - if (oa_config->id < 0) { - drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n"); - err = oa_config->id; - goto sysfs_err; - } - - mutex_unlock(&oa->metrics_lock); - - drm_dbg(&oa->xe->drm, "Added config %s id=%i\n", oa_config->uuid, oa_config->id); - - return oa_config->id; - -sysfs_err: - mutex_unlock(&oa->metrics_lock); -reg_err: - xe_oa_config_put(oa_config); - drm_dbg(&oa->xe->drm, "Failed to add new OA config\n"); - return err; -} - -/** - * xe_oa_remove_config_ioctl - Removes one OA config - * @dev: @drm_device - * @data: pointer to struct @drm_xe_observation_param - * @file: @drm_file - */ -int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_oa *oa = &xe->oa; - struct xe_oa_config *oa_config; - u64 arg, *ptr = u64_to_user_ptr(data); - int ret; - - if (!oa->xe) { - drm_dbg(&xe->drm, "xe oa interface not available for this system\n"); - return -ENODEV; - } - - if (xe_observation_paranoid && !perfmon_capable()) { - drm_dbg(&oa->xe->drm, "Insufficient privileges to remove xe OA config\n"); - return -EACCES; - } - - ret = get_user(arg, ptr); - if (XE_IOCTL_DBG(oa->xe, ret)) - return ret; - - ret = mutex_lock_interruptible(&oa->metrics_lock); - if (ret) - return ret; - - oa_config = idr_find(&oa->metrics_idr, arg); - if (!oa_config) { - drm_dbg(&oa->xe->drm, "Failed to remove unknown OA config\n"); - ret = -ENOENT; - goto err_unlock; - } - - WARN_ON(arg != oa_config->id); - - sysfs_remove_group(oa->metrics_kobj, &oa_config->sysfs_metric); - idr_remove(&oa->metrics_idr, arg); - - mutex_unlock(&oa->metrics_lock); - - drm_dbg(&oa->xe->drm, "Removed config %s id=%i\n", oa_config->uuid, oa_config->id); - - xe_oa_config_put(oa_config); - - return 0; - -err_unlock: - mutex_unlock(&oa->metrics_lock); - return ret; -} - -/** - * xe_oa_register - Xe OA registration - * @xe: @xe_device - * - * Exposes the metrics sysfs directory upon completion of module initialization - */ -void xe_oa_register(struct xe_device *xe) -{ - struct xe_oa *oa = &xe->oa; - - if (!oa->xe) - return; - - oa->metrics_kobj = kobject_create_and_add("metrics", - &xe->drm.primary->kdev->kobj); -} - -/** - * xe_oa_unregister - Xe OA de-registration - * @xe: @xe_device - */ -void xe_oa_unregister(struct xe_device *xe) -{ - struct xe_oa *oa = &xe->oa; - - if (!oa->metrics_kobj) - return; - - kobject_put(oa->metrics_kobj); - oa->metrics_kobj = NULL; -} - -static u32 num_oa_units_per_gt(struct xe_gt *gt) -{ - return 1; -} - -static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) -{ - if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) { - /* - * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices - * within the gt use the same OAM. All MTL/LNL SKUs list 1 SA MEDIA - */ - xe_gt_WARN_ON(hwe->gt, hwe->gt->info.type != XE_GT_TYPE_MEDIA); - - return 0; - } - - return XE_OA_UNIT_INVALID; -} - -static u32 __hwe_oa_unit(struct xe_hw_engine *hwe) -{ - switch (hwe->class) { - case XE_ENGINE_CLASS_RENDER: - case XE_ENGINE_CLASS_COMPUTE: - return 0; - - case XE_ENGINE_CLASS_VIDEO_DECODE: - case XE_ENGINE_CLASS_VIDEO_ENHANCE: - return __hwe_oam_unit(hwe); - - default: - return XE_OA_UNIT_INVALID; - } -} - -static struct xe_oa_regs __oam_regs(u32 base) -{ - return (struct xe_oa_regs) { - base, - OAM_HEAD_POINTER(base), - OAM_TAIL_POINTER(base), - OAM_BUFFER(base), - OAM_CONTEXT_CONTROL(base), - OAM_CONTROL(base), - OAM_DEBUG(base), - OAM_STATUS(base), - OAM_CONTROL_COUNTER_SEL_MASK, - }; -} - -static struct xe_oa_regs __oag_regs(void) -{ - return (struct xe_oa_regs) { - 0, - OAG_OAHEADPTR, - OAG_OATAILPTR, - OAG_OABUFFER, - OAG_OAGLBCTXCTRL, - OAG_OACONTROL, - OAG_OA_DEBUG, - OAG_OASTATUS, - OAG_OACONTROL_OA_COUNTER_SEL_MASK, - }; -} - -static void __xe_oa_init_oa_units(struct xe_gt *gt) -{ - const u32 mtl_oa_base[] = { 0x13000 }; - int i, num_units = gt->oa.num_oa_units; - - for (i = 0; i < num_units; i++) { - struct xe_oa_unit *u = >->oa.oa_unit[i]; - - if (gt->info.type != XE_GT_TYPE_MEDIA) { - u->regs = __oag_regs(); - u->type = DRM_XE_OA_UNIT_TYPE_OAG; - } else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { - u->regs = __oam_regs(mtl_oa_base[i]); - u->type = DRM_XE_OA_UNIT_TYPE_OAM; - } - - /* Ensure MMIO trigger remains disabled till there is a stream */ - xe_mmio_write32(>->mmio, u->regs.oa_debug, - oag_configure_mmio_trigger(NULL, false)); - - /* Set oa_unit_ids now to ensure ids remain contiguous */ - u->oa_unit_id = gt_to_xe(gt)->oa.oa_unit_ids++; - } -} - -static int xe_oa_init_gt(struct xe_gt *gt) -{ - u32 num_oa_units = num_oa_units_per_gt(gt); - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - struct xe_oa_unit *u; - - u = drmm_kcalloc(>_to_xe(gt)->drm, num_oa_units, sizeof(*u), GFP_KERNEL); - if (!u) - return -ENOMEM; - - for_each_hw_engine(hwe, gt, id) { - u32 index = __hwe_oa_unit(hwe); - - hwe->oa_unit = NULL; - if (index < num_oa_units) { - u[index].num_engines++; - hwe->oa_unit = &u[index]; - } - } - - /* - * Fused off engines can result in oa_unit's with num_engines == 0. These units - * will appear in OA unit query, but no OA streams can be opened on them. - */ - gt->oa.num_oa_units = num_oa_units; - gt->oa.oa_unit = u; - - __xe_oa_init_oa_units(gt); - - drmm_mutex_init(>_to_xe(gt)->drm, >->oa.gt_lock); - - return 0; -} - -static int xe_oa_init_oa_units(struct xe_oa *oa) -{ - struct xe_gt *gt; - int i, ret; - - for_each_gt(gt, oa->xe, i) { - ret = xe_oa_init_gt(gt); - if (ret) - return ret; - } - - return 0; -} - -static void oa_format_add(struct xe_oa *oa, enum xe_oa_format_name format) -{ - __set_bit(format, oa->format_mask); -} - -static void xe_oa_init_supported_formats(struct xe_oa *oa) -{ - if (GRAPHICS_VER(oa->xe) >= 20) { - /* Xe2+ */ - oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8); - oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8); - oa_format_add(oa, XE_OA_FORMAT_PEC64u64); - oa_format_add(oa, XE_OA_FORMAT_PEC64u64_B8_C8); - oa_format_add(oa, XE_OA_FORMAT_PEC64u32); - oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G1); - oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G1); - oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G2); - oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G2); - oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_32_G2_4); - oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_4_G2_32); - } else if (GRAPHICS_VERx100(oa->xe) >= 1270) { - /* XE_METEORLAKE */ - oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8); - oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8); - oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8); - oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8); - oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8); - oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8); - } else if (GRAPHICS_VERx100(oa->xe) >= 1255) { - /* XE_DG2, XE_PVC */ - oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8); - oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8); - oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8); - oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8); - } else { - /* Gen12+ */ - xe_assert(oa->xe, GRAPHICS_VER(oa->xe) >= 12); - oa_format_add(oa, XE_OA_FORMAT_A12); - oa_format_add(oa, XE_OA_FORMAT_A12_B8_C8); - oa_format_add(oa, XE_OA_FORMAT_A32u40_A4u32_B8_C8); - oa_format_add(oa, XE_OA_FORMAT_C4_B8); - } -} - -/** - * xe_oa_init - OA initialization during device probe - * @xe: @xe_device - * - * Return: 0 on success or a negative error code on failure - */ -int xe_oa_init(struct xe_device *xe) -{ - struct xe_oa *oa = &xe->oa; - int ret; - - /* Support OA only with GuC submission and Gen12+ */ - if (!xe_device_uc_enabled(xe) || GRAPHICS_VER(xe) < 12) - return 0; - - if (IS_SRIOV_VF(xe)) - return 0; - - oa->xe = xe; - oa->oa_formats = oa_formats; - - drmm_mutex_init(&oa->xe->drm, &oa->metrics_lock); - idr_init_base(&oa->metrics_idr, 1); - - ret = xe_oa_init_oa_units(oa); - if (ret) { - drm_err(&xe->drm, "OA initialization failed (%pe)\n", ERR_PTR(ret)); - goto exit; - } - - xe_oa_init_supported_formats(oa); - return 0; -exit: - oa->xe = NULL; - return ret; -} - -static int destroy_config(int id, void *p, void *data) -{ - xe_oa_config_put(p); - return 0; -} - -/** - * xe_oa_fini - OA de-initialization during device remove - * @xe: @xe_device - */ -void xe_oa_fini(struct xe_device *xe) -{ - struct xe_oa *oa = &xe->oa; - - if (!oa->xe) - return; - - idr_for_each(&oa->metrics_idr, destroy_config, oa); - idr_destroy(&oa->metrics_idr); - - oa->xe = NULL; -} |