diff options
author | Dave Airlie <airlied@redhat.com> | 2024-11-04 12:33:19 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2024-11-04 12:33:19 +1000 |
commit | ec916f3b29ae4a6635c8e33abc715b4ca0ce826b (patch) | |
tree | 42b4d147a9d98b567ee142eb2cdcb2f8becfbfe0 | |
parent | 9f062bb7d342a54c16024c5215c563ca5de91a73 (diff) |
2024y-11m-04d-02h-31m-41s UTC: drm-tip rerere cache update
git version 2.45.2
26 files changed, 0 insertions, 34275 deletions
diff --git a/rr-cache/0117026f4d2ebb01c75d5ab2e99d60ab51530d0c/preimage b/rr-cache/0117026f4d2ebb01c75d5ab2e99d60ab51530d0c/preimage deleted file mode 100644 index 1c95a7a7c99c..000000000000 --- a/rr-cache/0117026f4d2ebb01c75d5ab2e99d60ab51530d0c/preimage +++ /dev/null @@ -1,253 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2018 Intel Corporation - */ - -#include <linux/dmi.h> - -#include "i915_drv.h" -#include "intel_display_types.h" -#include "intel_quirks.h" - -static void intel_set_quirk(struct intel_display *display, enum intel_quirk_id quirk) -{ - display->quirks.mask |= BIT(quirk); -} - -/* - * Some machines (Lenovo U160) do not work with SSC on LVDS for some reason - */ -static void quirk_ssc_force_disable(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_LVDS_SSC_DISABLE); - drm_info(display->drm, "applying lvds SSC disable quirk\n"); -} - -/* - * A machine (e.g. Acer Aspire 5734Z) may need to invert the panel backlight - * brightness value - */ -static void quirk_invert_brightness(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_INVERT_BRIGHTNESS); - drm_info(display->drm, "applying inverted panel brightness quirk\n"); -} - -/* Some VBT's incorrectly indicate no backlight is present */ -static void quirk_backlight_present(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_BACKLIGHT_PRESENT); - drm_info(display->drm, "applying backlight present quirk\n"); -} - -/* Toshiba Satellite P50-C-18C requires T12 delay to be min 800ms - * which is 300 ms greater than eDP spec T12 min. - */ -static void quirk_increase_t12_delay(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_INCREASE_T12_DELAY); - drm_info(display->drm, "Applying T12 delay quirk\n"); -} - -/* - * GeminiLake NUC HDMI outputs require additional off time - * this allows the onboard retimer to correctly sync to signal - */ -static void quirk_increase_ddi_disabled_time(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_INCREASE_DDI_DISABLED_TIME); - drm_info(display->drm, "Applying Increase DDI Disabled quirk\n"); -} - -static void quirk_no_pps_backlight_power_hook(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK); - drm_info(display->drm, "Applying no pps backlight power quirk\n"); -} - -static void quirk_fw_sync_len(struct intel_dp *intel_dp) -{ - struct intel_display *display = to_intel_display(intel_dp); - - intel_set_dpcd_quirk(intel_dp, QUIRK_FW_SYNC_LEN); - drm_info(display->drm, "Applying Fast Wake sync pulse count quirk\n"); -} - -struct intel_quirk { - int device; - int subsystem_vendor; - int subsystem_device; - void (*hook)(struct intel_display *display); -}; - -/* For systems that don't have a meaningful PCI subdevice/subvendor ID */ -struct intel_dmi_quirk { - void (*hook)(struct intel_display *display); - const struct dmi_system_id (*dmi_id_list)[]; -}; - -static int intel_dmi_reverse_brightness(const struct dmi_system_id *id) -{ - DRM_INFO("Backlight polarity reversed on %s\n", id->ident); - return 1; -} - -static int intel_dmi_no_pps_backlight(const struct dmi_system_id *id) -{ - DRM_INFO("No pps backlight support on %s\n", id->ident); - return 1; -} - -static const struct intel_dmi_quirk intel_dmi_quirks[] = { - { - .dmi_id_list = &(const struct dmi_system_id[]) { - { - .callback = intel_dmi_reverse_brightness, - .ident = "NCR Corporation", - .matches = {DMI_MATCH(DMI_SYS_VENDOR, "NCR Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, ""), - }, - }, - { - .callback = intel_dmi_reverse_brightness, - .ident = "Thundersoft TST178 tablet", - /* DMI strings are too generic, also match on BIOS date */ - .matches = {DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "Aptio CRB"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "To be filled by O.E.M."), - DMI_EXACT_MATCH(DMI_BIOS_DATE, "04/15/2014"), - }, - }, - { } /* terminating entry */ - }, - .hook = quirk_invert_brightness, - }, - { - .dmi_id_list = &(const struct dmi_system_id[]) { - { - .callback = intel_dmi_no_pps_backlight, - .ident = "Google Lillipup sku524294", - .matches = {DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Google"), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "Lindar"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "sku524294"), - }, - }, - { - .callback = intel_dmi_no_pps_backlight, - .ident = "Google Lillipup sku524295", - .matches = {DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Google"), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "Lindar"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "sku524295"), - }, - }, - { } - }, - .hook = quirk_no_pps_backlight_power_hook, - }, -}; - -static struct intel_quirk intel_quirks[] = { - /* Lenovo U160 cannot use SSC on LVDS */ - { 0x0046, 0x17aa, 0x3920, quirk_ssc_force_disable }, - - /* Sony Vaio Y cannot use SSC on LVDS */ - { 0x0046, 0x104d, 0x9076, quirk_ssc_force_disable }, - - /* Acer Aspire 5734Z must invert backlight brightness */ - { 0x2a42, 0x1025, 0x0459, quirk_invert_brightness }, - - /* Acer/eMachines G725 */ - { 0x2a42, 0x1025, 0x0210, quirk_invert_brightness }, - - /* Acer/eMachines e725 */ - { 0x2a42, 0x1025, 0x0212, quirk_invert_brightness }, - - /* Acer/Packard Bell NCL20 */ - { 0x2a42, 0x1025, 0x034b, quirk_invert_brightness }, - - /* Acer Aspire 4736Z */ - { 0x2a42, 0x1025, 0x0260, quirk_invert_brightness }, - - /* Acer Aspire 5336 */ - { 0x2a42, 0x1025, 0x048a, quirk_invert_brightness }, - - /* Acer C720 and C720P Chromebooks (Celeron 2955U) have backlights */ - { 0x0a06, 0x1025, 0x0a11, quirk_backlight_present }, - - /* Acer C720 Chromebook (Core i3 4005U) */ - { 0x0a16, 0x1025, 0x0a11, quirk_backlight_present }, - - /* Apple Macbook 2,1 (Core 2 T7400) */ - { 0x27a2, 0x8086, 0x7270, quirk_backlight_present }, - - /* Apple Macbook 4,1 */ - { 0x2a02, 0x106b, 0x00a1, quirk_backlight_present }, - - /* Toshiba CB35 Chromebook (Celeron 2955U) */ - { 0x0a06, 0x1179, 0x0a88, quirk_backlight_present }, - - /* HP Chromebook 14 (Celeron 2955U) */ - { 0x0a06, 0x103c, 0x21ed, quirk_backlight_present }, - - /* Dell Chromebook 11 */ - { 0x0a06, 0x1028, 0x0a35, quirk_backlight_present }, - - /* Dell Chromebook 11 (2015 version) */ - { 0x0a16, 0x1028, 0x0a35, quirk_backlight_present }, - - /* Toshiba Satellite P50-C-18C */ - { 0x191B, 0x1179, 0xF840, quirk_increase_t12_delay }, - - /* GeminiLake NUC */ - { 0x3185, 0x8086, 0x2072, quirk_increase_ddi_disabled_time }, - { 0x3184, 0x8086, 0x2072, quirk_increase_ddi_disabled_time }, - /* ASRock ITX*/ - { 0x3185, 0x1849, 0x2212, quirk_increase_ddi_disabled_time }, - { 0x3184, 0x1849, 0x2212, quirk_increase_ddi_disabled_time }, - /* ECS Liva Q2 */ - { 0x3185, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time }, - { 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time }, - /* HP Notebook - 14-r206nv */ - { 0x0f31, 0x103c, 0x220f, quirk_invert_brightness }, -}; - -<<<<<<< -======= -static struct intel_dpcd_quirk intel_dpcd_quirks[] = { - /* Dell Precision 5490 */ - { - .device = 0x7d55, - .subsystem_vendor = 0x1028, - .subsystem_device = 0x0cc7, - .sink_oui = SINK_OUI(0x38, 0xec, 0x11), - .hook = quirk_fw_sync_len, - }, - -}; - ->>>>>>> -void intel_init_quirks(struct intel_display *display) -{ - struct pci_dev *d = to_pci_dev(display->drm->dev); - int i; - - for (i = 0; i < ARRAY_SIZE(intel_quirks); i++) { - struct intel_quirk *q = &intel_quirks[i]; - - if (d->device == q->device && - (d->subsystem_vendor == q->subsystem_vendor || - q->subsystem_vendor == PCI_ANY_ID) && - (d->subsystem_device == q->subsystem_device || - q->subsystem_device == PCI_ANY_ID)) - q->hook(display); - } - for (i = 0; i < ARRAY_SIZE(intel_dmi_quirks); i++) { - if (dmi_check_system(*intel_dmi_quirks[i].dmi_id_list) != 0) - intel_dmi_quirks[i].hook(display); - } -} - -bool intel_has_quirk(struct intel_display *display, enum intel_quirk_id quirk) -{ - return display->quirks.mask & BIT(quirk); -} diff --git a/rr-cache/0117026f4d2ebb01c75d5ab2e99d60ab51530d0c/preimage.1 b/rr-cache/0117026f4d2ebb01c75d5ab2e99d60ab51530d0c/preimage.1 deleted file mode 100644 index 1c95a7a7c99c..000000000000 --- a/rr-cache/0117026f4d2ebb01c75d5ab2e99d60ab51530d0c/preimage.1 +++ /dev/null @@ -1,253 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2018 Intel Corporation - */ - -#include <linux/dmi.h> - -#include "i915_drv.h" -#include "intel_display_types.h" -#include "intel_quirks.h" - -static void intel_set_quirk(struct intel_display *display, enum intel_quirk_id quirk) -{ - display->quirks.mask |= BIT(quirk); -} - -/* - * Some machines (Lenovo U160) do not work with SSC on LVDS for some reason - */ -static void quirk_ssc_force_disable(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_LVDS_SSC_DISABLE); - drm_info(display->drm, "applying lvds SSC disable quirk\n"); -} - -/* - * A machine (e.g. Acer Aspire 5734Z) may need to invert the panel backlight - * brightness value - */ -static void quirk_invert_brightness(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_INVERT_BRIGHTNESS); - drm_info(display->drm, "applying inverted panel brightness quirk\n"); -} - -/* Some VBT's incorrectly indicate no backlight is present */ -static void quirk_backlight_present(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_BACKLIGHT_PRESENT); - drm_info(display->drm, "applying backlight present quirk\n"); -} - -/* Toshiba Satellite P50-C-18C requires T12 delay to be min 800ms - * which is 300 ms greater than eDP spec T12 min. - */ -static void quirk_increase_t12_delay(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_INCREASE_T12_DELAY); - drm_info(display->drm, "Applying T12 delay quirk\n"); -} - -/* - * GeminiLake NUC HDMI outputs require additional off time - * this allows the onboard retimer to correctly sync to signal - */ -static void quirk_increase_ddi_disabled_time(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_INCREASE_DDI_DISABLED_TIME); - drm_info(display->drm, "Applying Increase DDI Disabled quirk\n"); -} - -static void quirk_no_pps_backlight_power_hook(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK); - drm_info(display->drm, "Applying no pps backlight power quirk\n"); -} - -static void quirk_fw_sync_len(struct intel_dp *intel_dp) -{ - struct intel_display *display = to_intel_display(intel_dp); - - intel_set_dpcd_quirk(intel_dp, QUIRK_FW_SYNC_LEN); - drm_info(display->drm, "Applying Fast Wake sync pulse count quirk\n"); -} - -struct intel_quirk { - int device; - int subsystem_vendor; - int subsystem_device; - void (*hook)(struct intel_display *display); -}; - -/* For systems that don't have a meaningful PCI subdevice/subvendor ID */ -struct intel_dmi_quirk { - void (*hook)(struct intel_display *display); - const struct dmi_system_id (*dmi_id_list)[]; -}; - -static int intel_dmi_reverse_brightness(const struct dmi_system_id *id) -{ - DRM_INFO("Backlight polarity reversed on %s\n", id->ident); - return 1; -} - -static int intel_dmi_no_pps_backlight(const struct dmi_system_id *id) -{ - DRM_INFO("No pps backlight support on %s\n", id->ident); - return 1; -} - -static const struct intel_dmi_quirk intel_dmi_quirks[] = { - { - .dmi_id_list = &(const struct dmi_system_id[]) { - { - .callback = intel_dmi_reverse_brightness, - .ident = "NCR Corporation", - .matches = {DMI_MATCH(DMI_SYS_VENDOR, "NCR Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, ""), - }, - }, - { - .callback = intel_dmi_reverse_brightness, - .ident = "Thundersoft TST178 tablet", - /* DMI strings are too generic, also match on BIOS date */ - .matches = {DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "Aptio CRB"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "To be filled by O.E.M."), - DMI_EXACT_MATCH(DMI_BIOS_DATE, "04/15/2014"), - }, - }, - { } /* terminating entry */ - }, - .hook = quirk_invert_brightness, - }, - { - .dmi_id_list = &(const struct dmi_system_id[]) { - { - .callback = intel_dmi_no_pps_backlight, - .ident = "Google Lillipup sku524294", - .matches = {DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Google"), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "Lindar"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "sku524294"), - }, - }, - { - .callback = intel_dmi_no_pps_backlight, - .ident = "Google Lillipup sku524295", - .matches = {DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Google"), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "Lindar"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "sku524295"), - }, - }, - { } - }, - .hook = quirk_no_pps_backlight_power_hook, - }, -}; - -static struct intel_quirk intel_quirks[] = { - /* Lenovo U160 cannot use SSC on LVDS */ - { 0x0046, 0x17aa, 0x3920, quirk_ssc_force_disable }, - - /* Sony Vaio Y cannot use SSC on LVDS */ - { 0x0046, 0x104d, 0x9076, quirk_ssc_force_disable }, - - /* Acer Aspire 5734Z must invert backlight brightness */ - { 0x2a42, 0x1025, 0x0459, quirk_invert_brightness }, - - /* Acer/eMachines G725 */ - { 0x2a42, 0x1025, 0x0210, quirk_invert_brightness }, - - /* Acer/eMachines e725 */ - { 0x2a42, 0x1025, 0x0212, quirk_invert_brightness }, - - /* Acer/Packard Bell NCL20 */ - { 0x2a42, 0x1025, 0x034b, quirk_invert_brightness }, - - /* Acer Aspire 4736Z */ - { 0x2a42, 0x1025, 0x0260, quirk_invert_brightness }, - - /* Acer Aspire 5336 */ - { 0x2a42, 0x1025, 0x048a, quirk_invert_brightness }, - - /* Acer C720 and C720P Chromebooks (Celeron 2955U) have backlights */ - { 0x0a06, 0x1025, 0x0a11, quirk_backlight_present }, - - /* Acer C720 Chromebook (Core i3 4005U) */ - { 0x0a16, 0x1025, 0x0a11, quirk_backlight_present }, - - /* Apple Macbook 2,1 (Core 2 T7400) */ - { 0x27a2, 0x8086, 0x7270, quirk_backlight_present }, - - /* Apple Macbook 4,1 */ - { 0x2a02, 0x106b, 0x00a1, quirk_backlight_present }, - - /* Toshiba CB35 Chromebook (Celeron 2955U) */ - { 0x0a06, 0x1179, 0x0a88, quirk_backlight_present }, - - /* HP Chromebook 14 (Celeron 2955U) */ - { 0x0a06, 0x103c, 0x21ed, quirk_backlight_present }, - - /* Dell Chromebook 11 */ - { 0x0a06, 0x1028, 0x0a35, quirk_backlight_present }, - - /* Dell Chromebook 11 (2015 version) */ - { 0x0a16, 0x1028, 0x0a35, quirk_backlight_present }, - - /* Toshiba Satellite P50-C-18C */ - { 0x191B, 0x1179, 0xF840, quirk_increase_t12_delay }, - - /* GeminiLake NUC */ - { 0x3185, 0x8086, 0x2072, quirk_increase_ddi_disabled_time }, - { 0x3184, 0x8086, 0x2072, quirk_increase_ddi_disabled_time }, - /* ASRock ITX*/ - { 0x3185, 0x1849, 0x2212, quirk_increase_ddi_disabled_time }, - { 0x3184, 0x1849, 0x2212, quirk_increase_ddi_disabled_time }, - /* ECS Liva Q2 */ - { 0x3185, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time }, - { 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time }, - /* HP Notebook - 14-r206nv */ - { 0x0f31, 0x103c, 0x220f, quirk_invert_brightness }, -}; - -<<<<<<< -======= -static struct intel_dpcd_quirk intel_dpcd_quirks[] = { - /* Dell Precision 5490 */ - { - .device = 0x7d55, - .subsystem_vendor = 0x1028, - .subsystem_device = 0x0cc7, - .sink_oui = SINK_OUI(0x38, 0xec, 0x11), - .hook = quirk_fw_sync_len, - }, - -}; - ->>>>>>> -void intel_init_quirks(struct intel_display *display) -{ - struct pci_dev *d = to_pci_dev(display->drm->dev); - int i; - - for (i = 0; i < ARRAY_SIZE(intel_quirks); i++) { - struct intel_quirk *q = &intel_quirks[i]; - - if (d->device == q->device && - (d->subsystem_vendor == q->subsystem_vendor || - q->subsystem_vendor == PCI_ANY_ID) && - (d->subsystem_device == q->subsystem_device || - q->subsystem_device == PCI_ANY_ID)) - q->hook(display); - } - for (i = 0; i < ARRAY_SIZE(intel_dmi_quirks); i++) { - if (dmi_check_system(*intel_dmi_quirks[i].dmi_id_list) != 0) - intel_dmi_quirks[i].hook(display); - } -} - -bool intel_has_quirk(struct intel_display *display, enum intel_quirk_id quirk) -{ - return display->quirks.mask & BIT(quirk); -} diff --git a/rr-cache/0492c6e9c9c96d5c29e98e6e44d437de9f979170/preimage b/rr-cache/0492c6e9c9c96d5c29e98e6e44d437de9f979170/preimage deleted file mode 100644 index 2a778618213e..000000000000 --- a/rr-cache/0492c6e9c9c96d5c29e98e6e44d437de9f979170/preimage +++ /dev/null @@ -1,2026 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include "xe_pt.h" - -#include "regs/xe_gtt_defs.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_exec_queue.h" -#include "xe_gt.h" -#include "xe_gt_tlb_invalidation.h" -#include "xe_migrate.h" -#include "xe_pt_types.h" -#include "xe_pt_walk.h" -#include "xe_res_cursor.h" -#include "xe_sched_job.h" -#include "xe_sync.h" -#include "xe_trace.h" -#include "xe_ttm_stolen_mgr.h" -#include "xe_vm.h" - -struct xe_pt_dir { - struct xe_pt pt; - /** @children: Array of page-table child nodes */ - struct xe_ptw *children[XE_PDES]; -}; - -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) -#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) -#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) -#else -#define xe_pt_set_addr(__xe_pt, __addr) -#define xe_pt_addr(__xe_pt) 0ull -#endif - -static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; -static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; - -#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) - -static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) -{ - return container_of(pt, struct xe_pt_dir, pt); -} - -static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) -{ - return container_of(pt_dir->children[index], struct xe_pt, base); -} - -static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, - unsigned int level) -{ - struct xe_device *xe = tile_to_xe(tile); - u16 pat_index = xe->pat.idx[XE_CACHE_WB]; - u8 id = tile->id; - - if (!xe_vm_has_scratch(vm)) - return 0; - - if (level > MAX_HUGEPTE_LEVEL) - return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, - 0, pat_index); - - return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | - XE_PTE_NULL; -} - -static void xe_pt_free(struct xe_pt *pt) -{ - if (pt->level) - kfree(as_xe_pt_dir(pt)); - else - kfree(pt); -} - -/** - * xe_pt_create() - Create a page-table. - * @vm: The vm to create for. - * @tile: The tile to create for. - * @level: The page-table level. - * - * Allocate and initialize a single struct xe_pt metadata structure. Also - * create the corresponding page-table bo, but don't initialize it. If the - * level is grater than zero, then it's assumed to be a directory page- - * table and the directory structure is also allocated and initialized to - * NULL pointers. - * - * Return: A valid struct xe_pt pointer on success, Pointer error code on - * error. - */ -struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level) -{ - struct xe_pt *pt; - struct xe_bo *bo; - int err; - - if (level) { - struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL); - - pt = (dir) ? &dir->pt : NULL; - } else { - pt = kzalloc(sizeof(*pt), GFP_KERNEL); - } - if (!pt) - return ERR_PTR(-ENOMEM); - - pt->level = level; - bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_NO_RESV_EVICT | - XE_BO_FLAG_PAGETABLE); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - goto err_kfree; - } - pt->bo = bo; - pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; - - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, pt->bo); - xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL); - - return pt; - -err_kfree: - xe_pt_free(pt); - return ERR_PTR(err); -} - -/** - * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero - * entries. - * @tile: The tile the scratch pagetable of which to use. - * @vm: The vm we populate for. - * @pt: The pagetable the bo of which to initialize. - * - * Populate the page-table bo of @pt with entries pointing into the tile's - * scratch page-table tree if any. Otherwise populate with zeros. - */ -void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, - struct xe_pt *pt) -{ - struct iosys_map *map = &pt->bo->vmap; - u64 empty; - int i; - - if (!xe_vm_has_scratch(vm)) { - /* - * FIXME: Some memory is allocated already allocated to zero? - * Find out which memory that is and avoid this memset... - */ - xe_map_memset(vm->xe, map, 0, 0, SZ_4K); - } else { - empty = __xe_pt_empty_pte(tile, vm, pt->level); - for (i = 0; i < XE_PDES; i++) - xe_pt_write(vm->xe, map, i, empty); - } -} - -/** - * xe_pt_shift() - Return the ilog2 value of the size of the address range of - * a page-table at a certain level. - * @level: The level. - * - * Return: The ilog2 value of the size of the address range of a page-table - * at level @level. - */ -unsigned int xe_pt_shift(unsigned int level) -{ - return XE_PTE_SHIFT + XE_PDE_SHIFT * level; -} - -/** - * xe_pt_destroy() - Destroy a page-table tree. - * @pt: The root of the page-table tree to destroy. - * @flags: vm flags. Currently unused. - * @deferred: List head of lockless list for deferred putting. NULL for - * immediate putting. - * - * Puts the page-table bo, recursively calls xe_pt_destroy on all children - * and finally frees @pt. TODO: Can we remove the @flags argument? - */ -void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) -{ - int i; - - if (!pt) - return; - - XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list)); - xe_bo_unpin(pt->bo); - xe_bo_put_deferred(pt->bo, deferred); - - if (pt->level > 0 && pt->num_live) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - - for (i = 0; i < XE_PDES; i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, - deferred); - } - } - xe_pt_free(pt); -} - -/** - * DOC: Pagetable building - * - * Below we use the term "page-table" for both page-directories, containing - * pointers to lower level page-directories or page-tables, and level 0 - * page-tables that contain only page-table-entries pointing to memory pages. - * - * When inserting an address range in an already existing page-table tree - * there will typically be a set of page-tables that are shared with other - * address ranges, and a set that are private to this address range. - * The set of shared page-tables can be at most two per level, - * and those can't be updated immediately because the entries of those - * page-tables may still be in use by the gpu for other mappings. Therefore - * when inserting entries into those, we instead stage those insertions by - * adding insertion data into struct xe_vm_pgtable_update structures. This - * data, (subtrees for the cpu and page-table-entries for the gpu) is then - * added in a separate commit step. CPU-data is committed while still under the - * vm lock, the object lock and for userptr, the notifier lock in read mode. - * The GPU async data is committed either by the GPU or CPU after fulfilling - * relevant dependencies. - * For non-shared page-tables (and, in fact, for shared ones that aren't - * existing at the time of staging), we add the data in-place without the - * special update structures. This private part of the page-table tree will - * remain disconnected from the vm page-table tree until data is committed to - * the shared page tables of the vm tree in the commit phase. - */ - -struct xe_pt_update { - /** @update: The update structure we're building for this parent. */ - struct xe_vm_pgtable_update *update; - /** @parent: The parent. Used to detect a parent change. */ - struct xe_pt *parent; - /** @preexisting: Whether the parent was pre-existing or allocated */ - bool preexisting; -}; - -struct xe_pt_stage_bind_walk { - /** base: The base class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @vm: The vm we're building for. */ - struct xe_vm *vm; - /** @tile: The tile we're building for. */ - struct xe_tile *tile; - /** @default_pte: PTE flag only template. No address is associated */ - u64 default_pte; - /** @dma_offset: DMA offset to add to the PTE. */ - u64 dma_offset; - /** - * @needs_64k: This address range enforces 64K alignment and - * granularity. - */ - bool needs_64K; - /** - * @vma: VMA being mapped - */ - struct xe_vma *vma; - - /* Also input, but is updated during the walk*/ - /** @curs: The DMA address cursor. */ - struct xe_res_cursor *curs; - /** @va_curs_start: The Virtual address coresponding to @curs->start */ - u64 va_curs_start; - - /* Output */ - struct xe_walk_update { - /** @wupd.entries: Caller provided storage. */ - struct xe_vm_pgtable_update *entries; - /** @wupd.num_used_entries: Number of update @entries used. */ - unsigned int num_used_entries; - /** @wupd.updates: Tracks the update entry at a given level */ - struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; - } wupd; - - /* Walk state */ - /** - * @l0_end_addr: The end address of the current l0 leaf. Used for - * 64K granularity detection. - */ - u64 l0_end_addr; - /** @addr_64K: The start address of the current 64K chunk. */ - u64 addr_64K; - /** @found_64: Whether @add_64K actually points to a 64K chunk. */ - bool found_64K; -}; - -static int -xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, - pgoff_t offset, bool alloc_entries) -{ - struct xe_pt_update *upd = &wupd->updates[parent->level]; - struct xe_vm_pgtable_update *entry; - - /* - * For *each level*, we could only have one active - * struct xt_pt_update at any one time. Once we move on to a - * new parent and page-directory, the old one is complete, and - * updates are either already stored in the build tree or in - * @wupd->entries - */ - if (likely(upd->parent == parent)) - return 0; - - upd->parent = parent; - upd->preexisting = true; - - if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) - return -EINVAL; - - entry = wupd->entries + wupd->num_used_entries++; - upd->update = entry; - entry->ofs = offset; - entry->pt_bo = parent->bo; - entry->pt = parent; - entry->flags = 0; - entry->qwords = 0; - entry->pt_bo->update_index = -1; - - if (alloc_entries) { - entry->pt_entries = kmalloc_array(XE_PDES, - sizeof(*entry->pt_entries), - GFP_KERNEL); - if (!entry->pt_entries) - return -ENOMEM; - } - - return 0; -} - -/* - * NOTE: This is a very frequently called function so we allow ourselves - * to annotate (using branch prediction hints) the fastpath of updating a - * non-pre-existing pagetable with leaf ptes. - */ -static int -xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, - pgoff_t offset, struct xe_pt *xe_child, u64 pte) -{ - struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; - struct xe_pt_update *child_upd = xe_child ? - &xe_walk->wupd.updates[xe_child->level] : NULL; - int ret; - - ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); - if (unlikely(ret)) - return ret; - - /* - * Register this new pagetable so that it won't be recognized as - * a shared pagetable by a subsequent insertion. - */ - if (unlikely(child_upd)) { - child_upd->update = NULL; - child_upd->parent = xe_child; - child_upd->preexisting = false; - } - - if (likely(!upd->preexisting)) { - /* Continue building a non-connected subtree. */ - struct iosys_map *map = &parent->bo->vmap; - - if (unlikely(xe_child)) - parent->base.children[offset] = &xe_child->base; - - xe_pt_write(xe_walk->vm->xe, map, offset, pte); - parent->num_live++; - } else { - /* Shared pt. Stage update. */ - unsigned int idx; - struct xe_vm_pgtable_update *entry = upd->update; - - idx = offset - entry->ofs; - entry->pt_entries[idx].pt = xe_child; - entry->pt_entries[idx].pte = pte; - entry->qwords++; - } - - return 0; -} - -static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, - struct xe_pt_stage_bind_walk *xe_walk) -{ - u64 size, dma; - - if (level > MAX_HUGEPTE_LEVEL) - return false; - - /* Does the virtual range requested cover a huge pte? */ - if (!xe_pt_covers(addr, next, level, &xe_walk->base)) - return false; - - /* Does the DMA segment cover the whole pte? */ - if (next - xe_walk->va_curs_start > xe_walk->curs->size) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - /* Is the DMA address huge PTE size aligned? */ - size = next - addr; - dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); - - return IS_ALIGNED(dma, size); -} - -/* - * Scan the requested mapping to check whether it can be done entirely - * with 64K PTEs. - */ -static bool -xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - struct xe_res_cursor curs = *xe_walk->curs; - - if (!IS_ALIGNED(addr, SZ_64K)) - return false; - - if (next > xe_walk->l0_end_addr) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - xe_res_next(&curs, addr - xe_walk->va_curs_start); - for (; addr < next; addr += SZ_64K) { - if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) - return false; - - xe_res_next(&curs, SZ_64K); - } - - return addr == next; -} - -/* - * For non-compact "normal" 4K level-0 pagetables, we want to try to group - * addresses together in 64K-contigous regions to add a 64K TLB hint for the - * device to the PTE. - * This function determines whether the address is part of such a - * segment. For VRAM in normal pagetables, this is strictly necessary on - * some devices. - */ -static bool -xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - /* Address is within an already found 64k region */ - if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) - return true; - - xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); - xe_walk->addr_64K = addr; - - return xe_walk->found_64K; -} - -static int -xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_bind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - u16 pat_index = xe_walk->vma->pat_index; - struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); - struct xe_vm *vm = xe_walk->vm; - struct xe_pt *xe_child; - bool covers; - int ret = 0; - u64 pte; - - /* Is this a leaf entry ?*/ - if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { - struct xe_res_cursor *curs = xe_walk->curs; - bool is_null = xe_vma_is_null(xe_walk->vma); - - XE_WARN_ON(xe_walk->va_curs_start != addr); - - pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : - xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->vma, pat_index, level); - pte |= xe_walk->default_pte; - - /* - * Set the XE_PTE_PS64 hint if possible, otherwise if - * this device *requires* 64K PTE size for VRAM, fail. - */ - if (level == 0 && !xe_parent->is_compact) { - if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; - pte |= XE_PTE_PS64; - } else if (XE_WARN_ON(xe_walk->needs_64K)) { - return -EINVAL; - } - } - - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); - if (unlikely(ret)) - return ret; - - if (!is_null) - xe_res_next(curs, next - addr); - xe_walk->va_curs_start = next; - xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); - *action = ACTION_CONTINUE; - - return ret; - } - - /* - * Descending to lower level. Determine if we need to allocate a - * new page table or -directory, which we do if there is no - * previous one or there is one we can completely replace. - */ - if (level == 1) { - walk->shifts = xe_normal_pt_shifts; - xe_walk->l0_end_addr = next; - } - - covers = xe_pt_covers(addr, next, level, &xe_walk->base); - if (covers || !*child) { - u64 flags = 0; - - xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); - if (IS_ERR(xe_child)) - return PTR_ERR(xe_child); - - xe_pt_set_addr(xe_child, - round_down(addr, 1ull << walk->shifts[level])); - - if (!covers) - xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child); - - *child = &xe_child->base; - - /* - * Prefer the compact pagetable layout for L0 if possible. Only - * possible if VMA covers entire 2MB region as compact 64k and - * 4k pages cannot be mixed within a 2MB region. - * TODO: Suballocate the pt bo to avoid wasting a lot of - * memory. - */ - if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && - covers && xe_pt_scan_64K(addr, next, xe_walk)) { - walk->shifts = xe_compact_pt_shifts; - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT; - flags |= XE_PDE_64K; - xe_child->is_compact = true; - } - - pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, - pte); - } - - *action = ACTION_SUBTREE; - return ret; -} - -static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { - .pt_entry = xe_pt_stage_bind_entry, -}; - -/** - * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address - * range. - * @tile: The tile we're building for. - * @vma: The vma indicating the address range. - * @entries: Storage for the update entries used for connecting the tree to - * the main tree at commit time. - * @num_entries: On output contains the number of @entries used. - * - * This function builds a disconnected page-table tree for a given address - * range. The tree is connected to the main vm tree for the gpu using - * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). - * The function builds xe_vm_pgtable_update structures for already existing - * shared page-tables, and non-existing shared and non-shared page-tables - * are built and populated directly. - * - * Return 0 on success, negative error code on error. - */ -static int -xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - struct xe_device *xe = tile_to_xe(tile); - struct xe_bo *bo = xe_vma_bo(vma); - bool is_devmem = !xe_vma_is_userptr(vma) && bo && - (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); - struct xe_res_cursor curs; - struct xe_pt_stage_bind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_bind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .vm = xe_vma_vm(vma), - .tile = tile, - .curs = &curs, - .va_curs_start = xe_vma_start(vma), - .vma = vma, - .wupd.entries = entries, - .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - int ret; - - /** - * Default atomic expectations for different allocation scenarios are as follows: - * - * 1. Traditional API: When the VM is not in LR mode: - * - Device atomics are expected to function with all allocations. - * - * 2. Compute/SVM API: When the VM is in LR mode: - * - Device atomics are the default behavior when the bo is placed in a single region. - * - In all other cases device atomics will be disabled with AE=0 until an application - * request differently using a ioctl like madvise. - */ - if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { - if (xe_vm_in_lr_mode(xe_vma_vm(vma))) { - if (bo && xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - /** - * If a SMEM+LMEM allocation is backed by SMEM, a device - * atomics will cause a gpu page fault and which then - * gets migrated to LMEM, bind such allocations with - * device atomics enabled. - */ - else if (is_devmem && !xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } else { - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } - - /** - * Unset AE if the platform(PVC) doesn't support it on an - * allocation - */ - if (!xe->info.has_device_atomics_on_smem && !is_devmem) - xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE; - } - - if (is_devmem) { - xe_walk.default_pte |= XE_PPGTT_PTE_DM; - xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); - } - - if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) - xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); - - xe_bo_assert_held(bo); - - if (!xe_vma_is_null(vma)) { - if (xe_vma_is_userptr(vma)) - xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, - xe_vma_size(vma), &curs); - else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) - xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - else - xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - } else { - curs.size = xe_vma_size(vma); - } - - ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - *num_entries = xe_walk.wupd.num_used_entries; - return ret; -} - -/** - * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a - * shared pagetable. - * @addr: The start address within the non-shared pagetable. - * @end: The end address within the non-shared pagetable. - * @level: The level of the non-shared pagetable. - * @walk: Walk info. The function adjusts the walk action. - * @action: next action to perform (see enum page_walk_action) - * @offset: Ignored on input, First non-shared entry on output. - * @end_offset: Ignored on input, Last non-shared entry + 1 on output. - * - * A non-shared page-table has some entries that belong to the address range - * and others that don't. This function determines the entries that belong - * fully to the address range. Depending on level, some entries may - * partially belong to the address range (that can't happen at level 0). - * The function detects that and adjust those offsets to not include those - * partial entries. Iff it does detect partial entries, we know that there must - * be shared page tables also at lower levels, so it adjusts the walk action - * accordingly. - * - * Return: true if there were non-shared entries, false otherwise. - */ -static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, - struct xe_pt_walk *walk, - enum page_walk_action *action, - pgoff_t *offset, pgoff_t *end_offset) -{ - u64 size = 1ull << walk->shifts[level]; - - *offset = xe_pt_offset(addr, level, walk); - *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; - - if (!level) - return true; - - /* - * If addr or next are not size aligned, there are shared pts at lower - * level, so in that case traverse down the subtree - */ - *action = ACTION_CONTINUE; - if (!IS_ALIGNED(addr, size)) { - *action = ACTION_SUBTREE; - (*offset)++; - } - - if (!IS_ALIGNED(end, size)) { - *action = ACTION_SUBTREE; - (*end_offset)--; - } - - return *end_offset > *offset; -} - -struct xe_pt_zap_ptes_walk { - /** @base: The walk base-class */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're building for */ - struct xe_tile *tile; - - /* Output */ - /** @needs_invalidate: Whether we need to invalidate TLB*/ - bool needs_invalidate; -}; - -static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_zap_ptes_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - /* - * Note that we're called from an entry callback, and we're dealing - * with the child of that entry rather than the parent, so need to - * adjust level down. - */ - if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, - &end_offset)) { - xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap, - offset * sizeof(u64), 0, - (end_offset - offset) * sizeof(u64)); - xe_walk->needs_invalidate = true; - } - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { - .pt_entry = xe_pt_zap_ptes_entry, -}; - -/** - * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range - * @tile: The tile we're zapping for. - * @vma: GPU VMA detailing address range. - * - * Eviction and Userptr invalidation needs to be able to zap the - * gpu ptes of a given address range in pagefaulting mode. - * In order to be able to do that, that function needs access to the shared - * page-table entrieaso it can either clear the leaf PTEs or - * clear the pointers to lower-level page-tables. The caller is required - * to hold the necessary locks to ensure neither the page-table connectivity - * nor the page-table entries of the range is updated from under us. - * - * Return: Whether ptes were actually updated and a TLB invalidation is - * required. - */ -bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) -{ - struct xe_pt_zap_ptes_walk xe_walk = { - .base = { - .ops = &xe_pt_zap_ptes_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); - - if (!(pt_mask & BIT(tile->id))) - return false; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.needs_invalidate; -} - -static void -xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, - struct iosys_map *map, void *data, - u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_pt_entry *ptes = update->pt_entries; - u64 *ptr = data; - u32 i; - - for (i = 0; i < num_qwords; i++) { - if (map) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, ptes[i].pte); - else - ptr[i] = ptes[i].pte; - } -} - -static void xe_pt_abort_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i, j; - - for (i = 0; i < num_entries; i++) { - if (!entries[i].pt_entries) - continue; - - for (j = 0; j < entries[i].qwords; j++) - xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL); - kfree(entries[i].pt_entries); - } -} - -static void xe_pt_commit_locks_assert(struct xe_vma *vma) -{ - struct xe_vm *vm = xe_vma_vm(vma); - - lockdep_assert_held(&vm->lock); - - if (!xe_vma_is_userptr(vma) && !xe_vma_is_null(vma)) - dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); - - xe_vm_assert_held(vm); -} - -static void xe_pt_commit_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries, bool rebind, - struct llist_head *deferred) -{ - u32 i, j; - - xe_pt_commit_locks_assert(vma); - - for (i = 0; i < num_entries; i++) { - struct xe_pt *pt = entries[i].pt; - struct xe_pt_dir *pt_dir; - - if (!rebind) - pt->num_live += entries[i].qwords; - - if (!pt->level) - continue; - - pt_dir = as_xe_pt_dir(pt); - for (j = 0; j < entries[i].qwords; j++) { - u32 j_ = j + entries[i].ofs; - struct xe_pt *newpte = entries[i].pt_entries[j].pt; - - if (xe_pt_entry(pt_dir, j_)) - xe_pt_destroy(xe_pt_entry(pt_dir, j_), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[j_] = &newpte->base; - } - } -} - -static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i; - - for (i = 0; i < num_entries; i++) - kfree(entries[i].pt_entries); -} - -static int -xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - int err; - - *num_entries = 0; - err = xe_pt_stage_bind(tile, vma, entries, num_entries); - if (!err) - xe_tile_assert(tile, *num_entries); - else /* abort! */ - xe_pt_abort_bind(vma, entries, *num_entries); - - return err; -} - -static void xe_vm_dbg_print_entries(struct xe_device *xe, - const struct xe_vm_pgtable_update *entries, - unsigned int num_entries, bool bind) -#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) -{ - unsigned int i; - - vm_dbg(&xe->drm, "%s: %u entries to update\n", bind ? "bind" : "unbind", - num_entries); - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - struct xe_pt *xe_pt = entry->pt; - u64 page_size = 1ull << xe_pt_shift(xe_pt->level); - u64 end; - u64 start; - - xe_assert(xe, !entry->pt->is_compact); - start = entry->ofs * page_size; - end = start + page_size * entry->qwords; - vm_dbg(&xe->drm, - "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", - i, xe_pt->level, entry->ofs, entry->qwords, - xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); - } -} -#else -{} -#endif - -static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs) -{ - int i; - - for (i = 0; i < num_syncs; i++) { - struct dma_fence *fence = syncs[i].fence; - - if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &fence->flags)) - return false; - } - - return true; -} - -static int job_test_add_deps(struct xe_sched_job *job, - struct dma_resv *resv, - enum dma_resv_usage usage) -{ - if (!job) { - if (!dma_resv_test_signaled(resv, usage)) - return -ETIME; - - return 0; - } - - return xe_sched_job_add_deps(job, resv, usage); -} - -static int vma_add_deps(struct xe_vma *vma, struct xe_sched_job *job) -{ - struct xe_bo *bo = xe_vma_bo(vma); - - xe_bo_assert_held(bo); - - if (bo && !bo->vm) - return job_test_add_deps(job, bo->ttm.base.resv, - DMA_RESV_USAGE_KERNEL); - - return 0; -} - -static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op, - struct xe_sched_job *job) -{ - int err = 0; - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = vma_add_deps(op->map.vma, job); - break; - case DRM_GPUVA_OP_REMAP: - if (op->remap.prev) - err = vma_add_deps(op->remap.prev, job); - if (!err && op->remap.next) - err = vma_add_deps(op->remap.next, job); - break; - case DRM_GPUVA_OP_UNMAP: - break; - case DRM_GPUVA_OP_PREFETCH: - err = vma_add_deps(gpuva_to_vma(op->base.prefetch.va), job); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static int xe_pt_vm_dependencies(struct xe_sched_job *job, - struct xe_vm *vm, - struct xe_vma_ops *vops, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_range_fence_tree *rftree) -{ - struct xe_range_fence *rtfence; - struct dma_fence *fence; - struct xe_vma_op *op; - int err = 0, i; - - xe_vm_assert_held(vm); - - if (!job && !no_in_syncs(vops->syncs, vops->num_syncs)) - return -ETIME; - - if (!job && !xe_exec_queue_is_idle(pt_update_ops->q)) - return -ETIME; - - if (pt_update_ops->wait_vm_bookkeep || pt_update_ops->wait_vm_kernel) { - err = job_test_add_deps(job, xe_vm_resv(vm), - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_BOOKKEEP : - DMA_RESV_USAGE_KERNEL); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_first(rftree, pt_update_ops->start, - pt_update_ops->last); - while (rtfence) { - fence = rtfence->fence; - - if (!dma_fence_is_signaled(fence)) { - /* - * Is this a CPU update? GPU is busy updating, so return - * an error - */ - if (!job) - return -ETIME; - - dma_fence_get(fence); - err = drm_sched_job_add_dependency(&job->drm, fence); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_next(rtfence, - pt_update_ops->start, - pt_update_ops->last); - } - - list_for_each_entry(op, &vops->list, link) { - err = op_add_deps(vm, op, job); - if (err) - return err; - } - - if (job) - err = xe_sched_job_last_fence_add_dep(job, vm); - else - err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); - - for (i = 0; job && !err && i < vops->num_syncs; i++) - err = xe_sync_entry_add_deps(&vops->syncs[i], job); - - return err; -} - -static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_vma_ops *vops = pt_update->vops; - struct xe_vm *vm = vops->vm; - struct xe_range_fence_tree *rftree = &vm->rftree[pt_update->tile_id]; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[pt_update->tile_id]; - - return xe_pt_vm_dependencies(pt_update->job, vm, pt_update->vops, - pt_update_ops, rftree); -} - -#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; - static u32 count; - - if (count++ % divisor == divisor - 1) { - uvma->userptr.divisor = divisor << 1; - return true; - } - - return false; -} - -#else - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - return false; -} - -#endif - -static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, - struct xe_vm_pgtable_update_ops *pt_update) -{ - struct xe_userptr_vma *uvma; - unsigned long notifier_seq; - - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - if (!xe_vma_is_userptr(vma)) - return 0; - - uvma = to_userptr_vma(vma); - notifier_seq = uvma->userptr.notifier_seq; - - if (uvma->userptr.initial_bind && !xe_vm_in_fault_mode(vm)) - return 0; - - if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq) && - !xe_pt_userptr_inject_eagain(uvma)) - return 0; - - if (xe_vm_in_fault_mode(vm)) { - return -EAGAIN; - } else { - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - - if (xe_vm_in_preempt_fence_mode(vm)) { - struct dma_resv_iter cursor; - struct dma_fence *fence; - long err; - - dma_resv_iter_begin(&cursor, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); - dma_resv_for_each_fence_unlocked(&cursor, fence) - dma_fence_enable_sw_signaling(fence); - dma_resv_iter_end(&cursor); - - err = dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - XE_WARN_ON(err <= 0); - } - } - - return 0; -} - -static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op, - struct xe_vm_pgtable_update_ops *pt_update) -{ - int err = 0; - - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = vma_check_userptr(vm, op->map.vma, pt_update); - break; - case DRM_GPUVA_OP_REMAP: - if (op->remap.prev) - err = vma_check_userptr(vm, op->remap.prev, pt_update); - if (!err && op->remap.next) - err = vma_check_userptr(vm, op->remap.next, pt_update); - break; - case DRM_GPUVA_OP_UNMAP: - break; - case DRM_GPUVA_OP_PREFETCH: - err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), - pt_update); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_vm *vm = pt_update->vops->vm; - struct xe_vma_ops *vops = pt_update->vops; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[pt_update->tile_id]; - struct xe_vma_op *op; - int err; - - err = xe_pt_pre_commit(pt_update); - if (err) - return err; - - down_read(&vm->userptr.notifier_lock); - - list_for_each_entry(op, &vops->list, link) { - err = op_check_userptr(vm, op, pt_update_ops); - if (err) { - up_read(&vm->userptr.notifier_lock); - break; - } - } - - return err; -} - -struct invalidation_fence { - struct xe_gt_tlb_invalidation_fence base; - struct xe_gt *gt; - struct dma_fence *fence; - struct dma_fence_cb cb; - struct work_struct work; - u64 start; - u64 end; - u32 asid; -}; - -static void invalidation_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct invalidation_fence *ifence = - container_of(cb, struct invalidation_fence, cb); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); - if (!ifence->fence->error) { - queue_work(system_wq, &ifence->work); - } else { - ifence->base.base.error = ifence->fence->error; - dma_fence_signal(&ifence->base.base); - dma_fence_put(&ifence->base.base); - } - dma_fence_put(ifence->fence); -} - -static void invalidation_fence_work_func(struct work_struct *w) -{ - struct invalidation_fence *ifence = - container_of(w, struct invalidation_fence, work); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); - xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, - ifence->end, ifence->asid); -} - -static int invalidation_fence_init(struct xe_gt *gt, - struct invalidation_fence *ifence, - struct dma_fence *fence, - u64 start, u64 end, u32 asid) -{ - int ret; - - trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - - ifence->fence = fence; - ifence->gt = gt; - ifence->start = start; - ifence->end = end; - ifence->asid = asid; - - INIT_WORK(&ifence->work, invalidation_fence_work_func); - ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); - if (ret == -ENOENT) { - dma_fence_put(ifence->fence); /* Usually dropped in CB */ - invalidation_fence_work_func(&ifence->work); - } else if (ret) { - dma_fence_put(&ifence->base.base); /* Caller ref */ - dma_fence_put(&ifence->base.base); /* Creation ref */ - } - - xe_gt_assert(gt, !ret || ret == -ENOENT); - - return ret && ret != -ENOENT ? ret : 0; -} - -struct xe_pt_stage_unbind_walk { - /** @base: The pagewalk base-class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're unbinding from. */ - struct xe_tile *tile; - - /** - * @modified_start: Walk range start, modified to include any - * shared pagetables that we're the only user of and can thus - * treat as private. - */ - u64 modified_start; - /** @modified_end: Walk range start, modified like @modified_start. */ - u64 modified_end; - - /* Output */ - /* @wupd: Structure to track the page-table updates we're building */ - struct xe_walk_update wupd; -}; - -/* - * Check whether this range is the only one populating this pagetable, - * and in that case, update the walk range checks so that higher levels don't - * view us as a shared pagetable. - */ -static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, - const struct xe_pt *child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - unsigned int shift = walk->shifts[level]; - u64 size = 1ull << shift; - - if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && - ((next - addr) >> shift) == child->num_live) { - u64 size = 1ull << walk->shifts[level + 1]; - - *action = ACTION_CONTINUE; - - if (xe_walk->modified_start >= addr) - xe_walk->modified_start = round_down(addr, size); - if (xe_walk->modified_end <= next) - xe_walk->modified_end = round_up(next, size); - - return true; - } - - return false; -} - -static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); - - return 0; -} - -static int -xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - u64 size = 1ull << walk->shifts[--level]; - - if (!IS_ALIGNED(addr, size)) - addr = xe_walk->modified_start; - if (!IS_ALIGNED(next, size)) - next = xe_walk->modified_end; - - /* Parent == *child is the root pt. Don't kill it. */ - if (parent != *child && - xe_pt_check_kill(addr, next, level, xe_child, action, walk)) - return 0; - - if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, - &end_offset)) - return 0; - - (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false); - xe_walk->wupd.updates[level].update->qwords = end_offset - offset; - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { - .pt_entry = xe_pt_stage_unbind_entry, - .pt_post_descend = xe_pt_stage_unbind_post_descend, -}; - -/** - * xe_pt_stage_unbind() - Build page-table update structures for an unbind - * operation - * @tile: The tile we're unbinding for. - * @vma: The vma we're unbinding. - * @entries: Caller-provided storage for the update structures. - * - * Builds page-table update structures for an unbind operation. The function - * will attempt to remove all page-tables that we're the only user - * of, and for that to work, the unbind operation must be committed in the - * same critical section that blocks racing binds to the same page-table tree. - * - * Return: The number of entries used. - */ -static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries) -{ - struct xe_pt_stage_unbind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_unbind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - .modified_start = xe_vma_start(vma), - .modified_end = xe_vma_end(vma), - .wupd.entries = entries, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.wupd.num_used_entries; -} - -static void -xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, - struct xe_tile *tile, struct iosys_map *map, - void *ptr, u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_vm *vm = pt_update->vops->vm; - u64 empty = __xe_pt_empty_pte(tile, vm, update->pt->level); - int i; - - if (map && map->is_iomem) - for (i = 0; i < num_qwords; ++i) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, empty); - else if (map) - memset64(map->vaddr + qword_ofs * sizeof(u64), empty, - num_qwords); - else - memset64(ptr, empty, num_qwords); -} - -static void -xe_pt_commit_unbind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 num_entries, - struct llist_head *deferred) -{ - u32 j; - - xe_pt_commit_locks_assert(vma); - - for (j = 0; j < num_entries; ++j) { - struct xe_vm_pgtable_update *entry = &entries[j]; - struct xe_pt *pt = entry->pt; - - pt->num_live -= entry->qwords; - if (pt->level) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - u32 i; - - for (i = entry->ofs; i < entry->ofs + entry->qwords; - i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[i] = NULL; - } - } - } -} - -static void -xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int i, level = 0; - u64 start, last; - - for (i = 0; i < pt_op->num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &pt_op->entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level)); - last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1; - - if (start < pt_update_ops->start) - pt_update_ops->start = start; - if (last > pt_update_ops->last) - pt_update_ops->last = last; -} - -static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, - xe->info.tile_count); - - return 0; -} - -static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - struct llist_head *deferred = &pt_update_ops->deferred; - int err; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - pt_op->vma = NULL; - pt_op->bind = true; - pt_op->rebind = BIT(tile->id) & vma->tile_present; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - err = xe_pt_prepare_bind(tile, vma, pt_op->entries, - &pt_op->num_entries); - if (!err) { - xe_tile_assert(tile, pt_op->num_entries <= - ARRAY_SIZE(pt_op->entries)); - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, true); - - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. On LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * On !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!pt_op->rebind && xe_vm_has_scratch(vm) && - xe_vm_in_preempt_fence_mode(vm))) - pt_update_ops->needs_invalidation = true; - else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - - /* FIXME: Don't commit right away */ - vma->tile_staged |= BIT(tile->id); - pt_op->vma = vma; - xe_pt_commit_bind(vma, pt_op->entries, pt_op->num_entries, - pt_op->rebind, deferred); - } - - return err; -} - -static int unbind_op_prepare(struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - struct llist_head *deferred = &pt_update_ops->deferred; - int err; - - if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id))) - return 0; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - /* - * Wait for invalidation to complete. Can corrupt internal page table - * state if an invalidation is running while preparing an unbind. - */ - if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma))) - mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier); - - pt_op->vma = vma; - pt_op->bind = false; - pt_op->rebind = false; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries); - - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, false); - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - pt_update_ops->needs_invalidation = true; - - /* FIXME: Don't commit right away */ - xe_pt_commit_unbind(vma, pt_op->entries, pt_op->num_entries, - deferred); - - return 0; -} - -static int op_prepare(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op) -{ - int err = 0; - - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma); - pt_update_ops->wait_vm_kernel = true; - break; - case DRM_GPUVA_OP_REMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va)); - - if (!err && op->remap.prev) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.prev); - pt_update_ops->wait_vm_bookkeep = true; - } - if (!err && op->remap.next) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.next); - pt_update_ops->wait_vm_bookkeep = true; - } - break; - case DRM_GPUVA_OP_UNMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va)); - break; - case DRM_GPUVA_OP_PREFETCH: - err = bind_op_prepare(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va)); - pt_update_ops->wait_vm_kernel = true; - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static void -xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops) -{ - init_llist_head(&pt_update_ops->deferred); - pt_update_ops->start = ~0x0ull; - pt_update_ops->last = 0x0ull; -} - -/** - * xe_pt_update_ops_prepare() - Prepare PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Prepare PT update operations which includes updating internal PT state, - * allocate memory for page tables, populate page table being pruned in, and - * create PT update operations for leaf insertion / removal. - * - * Return: 0 on success, negative error code on error. - */ -int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct xe_vma_op *op; - int err; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - xe_pt_update_ops_init(pt_update_ops); - - err = dma_resv_reserve_fences(xe_vm_resv(vops->vm), - tile_to_xe(tile)->info.tile_count); - if (err) - return err; - - list_for_each_entry(op, &vops->list, link) { - err = op_prepare(vops->vm, tile, pt_update_ops, op); - - if (err) - return err; - } - - xe_tile_assert(tile, pt_update_ops->current_op <= - pt_update_ops->num_ops); - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE) - return -ENOSPC; -#endif - - return 0; -} - -static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - vma->tile_present |= BIT(tile->id); - vma->tile_staged &= ~BIT(tile->id); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - to_userptr_vma(vma)->userptr.initial_bind = true; - } - - /* - * Kick rebind worker if this bind triggers preempt fences and not in - * the rebind worker - */ - if (pt_update_ops->wait_vm_bookkeep && - xe_vm_in_preempt_fence_mode(vm) && - !current->mm) - xe_vm_queue_rebind_worker(vm); -} - -static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - vma->tile_present &= ~BIT(tile->id); - if (!vma->tile_present) { - list_del_init(&vma->combined_links.rebind); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - } -} - -static void op_commit(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op, struct dma_fence *fence) -{ - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence); - break; - case DRM_GPUVA_OP_REMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence); - - if (op->remap.prev) - bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence); - if (op->remap.next) - bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence); - break; - case DRM_GPUVA_OP_UNMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence); - break; - case DRM_GPUVA_OP_PREFETCH: - bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } -} - -static const struct xe_migrate_pt_update_ops migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -/** - * xe_pt_update_ops_run() - Run PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Run PT update operations which includes committing internal PT state changes, - * creating job for PT update operations for leaf insertion / removal, and - * installing job fence in various places. - * - * Return: fence on success, negative ERR_PTR on error. - */ -struct dma_fence * -xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm *vm = vops->vm; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL; - struct xe_range_fence *rfence; - struct xe_vma_op *op; - int err = 0; - struct xe_migrate_pt_update update = { - .ops = pt_update_ops->needs_userptr_lock ? - &userptr_migrate_ops : - &migrate_ops, - .vops = vops, - .tile_id = tile->id, - }; - - lockdep_assert_held(&vm->lock); - xe_vm_assert_held(vm); - - if (!pt_update_ops->current_op) { - xe_tile_assert(tile, xe_vm_in_fault_mode(vm)); - - return dma_fence_get_stub(); - } - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vm->xe->vm_inject_error_position == FORCE_OP_ERROR_RUN) - return ERR_PTR(-ENOSPC); -#endif - - if (pt_update_ops->needs_invalidation) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); -<<<<<<< - if (!ifence) - return ERR_PTR(-ENOMEM); -======= - if (!ifence) { - err = -ENOMEM; - goto kill_vm_tile1; - } ->>>>>>> - } - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - err = -ENOMEM; - goto free_ifence; - } - - fence = xe_migrate_update_pgtables(tile->migrate, &update); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - goto free_rfence; - } - - if (xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - pt_update_ops->start, - pt_update_ops->last, fence)) - dma_fence_wait(fence, false); - - /* tlb invalidation must be done before signaling rebind */ - if (ifence) { -<<<<<<< - err = invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, - vm->usm.asid); - if (err) - goto put_fence; -======= - invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); ->>>>>>> - fence = &ifence->base.base; - } - - dma_resv_add_fence(xe_vm_resv(vm), fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, fence); - - if (pt_update_ops->needs_userptr_lock) - up_read(&vm->userptr.notifier_lock); - - return fence; - -put_fence: - if (pt_update_ops->needs_userptr_lock) - up_read(&vm->userptr.notifier_lock); - dma_fence_put(fence); -free_rfence: - kfree(rfence); -free_ifence: - kfree(ifence); - - return ERR_PTR(err); -} - -/** - * xe_pt_update_ops_fini() - Finish PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operations - * - * Finish PT update operations by committing to destroy page table memory - */ -void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - /* FIXME: Not 100% correct */ - for (i = 0; i < pt_update_ops->num_ops; ++i) { - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; - - if (pt_op->bind) - xe_pt_free_bind(pt_op->entries, pt_op->num_entries); - } - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); -} - -/** - * xe_pt_update_ops_abort() - Abort PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Abort PT update operations by unwinding internal PT state - */ -void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - /* FIXME: Just kill VM for now + cleanup PTs */ - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); - xe_vm_kill(vops->vm, false); -} diff --git a/rr-cache/0492c6e9c9c96d5c29e98e6e44d437de9f979170/preimage.1 b/rr-cache/0492c6e9c9c96d5c29e98e6e44d437de9f979170/preimage.1 deleted file mode 100644 index 2a778618213e..000000000000 --- a/rr-cache/0492c6e9c9c96d5c29e98e6e44d437de9f979170/preimage.1 +++ /dev/null @@ -1,2026 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include "xe_pt.h" - -#include "regs/xe_gtt_defs.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_exec_queue.h" -#include "xe_gt.h" -#include "xe_gt_tlb_invalidation.h" -#include "xe_migrate.h" -#include "xe_pt_types.h" -#include "xe_pt_walk.h" -#include "xe_res_cursor.h" -#include "xe_sched_job.h" -#include "xe_sync.h" -#include "xe_trace.h" -#include "xe_ttm_stolen_mgr.h" -#include "xe_vm.h" - -struct xe_pt_dir { - struct xe_pt pt; - /** @children: Array of page-table child nodes */ - struct xe_ptw *children[XE_PDES]; -}; - -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) -#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) -#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) -#else -#define xe_pt_set_addr(__xe_pt, __addr) -#define xe_pt_addr(__xe_pt) 0ull -#endif - -static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; -static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; - -#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) - -static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) -{ - return container_of(pt, struct xe_pt_dir, pt); -} - -static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) -{ - return container_of(pt_dir->children[index], struct xe_pt, base); -} - -static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, - unsigned int level) -{ - struct xe_device *xe = tile_to_xe(tile); - u16 pat_index = xe->pat.idx[XE_CACHE_WB]; - u8 id = tile->id; - - if (!xe_vm_has_scratch(vm)) - return 0; - - if (level > MAX_HUGEPTE_LEVEL) - return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, - 0, pat_index); - - return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | - XE_PTE_NULL; -} - -static void xe_pt_free(struct xe_pt *pt) -{ - if (pt->level) - kfree(as_xe_pt_dir(pt)); - else - kfree(pt); -} - -/** - * xe_pt_create() - Create a page-table. - * @vm: The vm to create for. - * @tile: The tile to create for. - * @level: The page-table level. - * - * Allocate and initialize a single struct xe_pt metadata structure. Also - * create the corresponding page-table bo, but don't initialize it. If the - * level is grater than zero, then it's assumed to be a directory page- - * table and the directory structure is also allocated and initialized to - * NULL pointers. - * - * Return: A valid struct xe_pt pointer on success, Pointer error code on - * error. - */ -struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level) -{ - struct xe_pt *pt; - struct xe_bo *bo; - int err; - - if (level) { - struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL); - - pt = (dir) ? &dir->pt : NULL; - } else { - pt = kzalloc(sizeof(*pt), GFP_KERNEL); - } - if (!pt) - return ERR_PTR(-ENOMEM); - - pt->level = level; - bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_NO_RESV_EVICT | - XE_BO_FLAG_PAGETABLE); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - goto err_kfree; - } - pt->bo = bo; - pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; - - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, pt->bo); - xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL); - - return pt; - -err_kfree: - xe_pt_free(pt); - return ERR_PTR(err); -} - -/** - * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero - * entries. - * @tile: The tile the scratch pagetable of which to use. - * @vm: The vm we populate for. - * @pt: The pagetable the bo of which to initialize. - * - * Populate the page-table bo of @pt with entries pointing into the tile's - * scratch page-table tree if any. Otherwise populate with zeros. - */ -void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, - struct xe_pt *pt) -{ - struct iosys_map *map = &pt->bo->vmap; - u64 empty; - int i; - - if (!xe_vm_has_scratch(vm)) { - /* - * FIXME: Some memory is allocated already allocated to zero? - * Find out which memory that is and avoid this memset... - */ - xe_map_memset(vm->xe, map, 0, 0, SZ_4K); - } else { - empty = __xe_pt_empty_pte(tile, vm, pt->level); - for (i = 0; i < XE_PDES; i++) - xe_pt_write(vm->xe, map, i, empty); - } -} - -/** - * xe_pt_shift() - Return the ilog2 value of the size of the address range of - * a page-table at a certain level. - * @level: The level. - * - * Return: The ilog2 value of the size of the address range of a page-table - * at level @level. - */ -unsigned int xe_pt_shift(unsigned int level) -{ - return XE_PTE_SHIFT + XE_PDE_SHIFT * level; -} - -/** - * xe_pt_destroy() - Destroy a page-table tree. - * @pt: The root of the page-table tree to destroy. - * @flags: vm flags. Currently unused. - * @deferred: List head of lockless list for deferred putting. NULL for - * immediate putting. - * - * Puts the page-table bo, recursively calls xe_pt_destroy on all children - * and finally frees @pt. TODO: Can we remove the @flags argument? - */ -void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) -{ - int i; - - if (!pt) - return; - - XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list)); - xe_bo_unpin(pt->bo); - xe_bo_put_deferred(pt->bo, deferred); - - if (pt->level > 0 && pt->num_live) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - - for (i = 0; i < XE_PDES; i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, - deferred); - } - } - xe_pt_free(pt); -} - -/** - * DOC: Pagetable building - * - * Below we use the term "page-table" for both page-directories, containing - * pointers to lower level page-directories or page-tables, and level 0 - * page-tables that contain only page-table-entries pointing to memory pages. - * - * When inserting an address range in an already existing page-table tree - * there will typically be a set of page-tables that are shared with other - * address ranges, and a set that are private to this address range. - * The set of shared page-tables can be at most two per level, - * and those can't be updated immediately because the entries of those - * page-tables may still be in use by the gpu for other mappings. Therefore - * when inserting entries into those, we instead stage those insertions by - * adding insertion data into struct xe_vm_pgtable_update structures. This - * data, (subtrees for the cpu and page-table-entries for the gpu) is then - * added in a separate commit step. CPU-data is committed while still under the - * vm lock, the object lock and for userptr, the notifier lock in read mode. - * The GPU async data is committed either by the GPU or CPU after fulfilling - * relevant dependencies. - * For non-shared page-tables (and, in fact, for shared ones that aren't - * existing at the time of staging), we add the data in-place without the - * special update structures. This private part of the page-table tree will - * remain disconnected from the vm page-table tree until data is committed to - * the shared page tables of the vm tree in the commit phase. - */ - -struct xe_pt_update { - /** @update: The update structure we're building for this parent. */ - struct xe_vm_pgtable_update *update; - /** @parent: The parent. Used to detect a parent change. */ - struct xe_pt *parent; - /** @preexisting: Whether the parent was pre-existing or allocated */ - bool preexisting; -}; - -struct xe_pt_stage_bind_walk { - /** base: The base class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @vm: The vm we're building for. */ - struct xe_vm *vm; - /** @tile: The tile we're building for. */ - struct xe_tile *tile; - /** @default_pte: PTE flag only template. No address is associated */ - u64 default_pte; - /** @dma_offset: DMA offset to add to the PTE. */ - u64 dma_offset; - /** - * @needs_64k: This address range enforces 64K alignment and - * granularity. - */ - bool needs_64K; - /** - * @vma: VMA being mapped - */ - struct xe_vma *vma; - - /* Also input, but is updated during the walk*/ - /** @curs: The DMA address cursor. */ - struct xe_res_cursor *curs; - /** @va_curs_start: The Virtual address coresponding to @curs->start */ - u64 va_curs_start; - - /* Output */ - struct xe_walk_update { - /** @wupd.entries: Caller provided storage. */ - struct xe_vm_pgtable_update *entries; - /** @wupd.num_used_entries: Number of update @entries used. */ - unsigned int num_used_entries; - /** @wupd.updates: Tracks the update entry at a given level */ - struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; - } wupd; - - /* Walk state */ - /** - * @l0_end_addr: The end address of the current l0 leaf. Used for - * 64K granularity detection. - */ - u64 l0_end_addr; - /** @addr_64K: The start address of the current 64K chunk. */ - u64 addr_64K; - /** @found_64: Whether @add_64K actually points to a 64K chunk. */ - bool found_64K; -}; - -static int -xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, - pgoff_t offset, bool alloc_entries) -{ - struct xe_pt_update *upd = &wupd->updates[parent->level]; - struct xe_vm_pgtable_update *entry; - - /* - * For *each level*, we could only have one active - * struct xt_pt_update at any one time. Once we move on to a - * new parent and page-directory, the old one is complete, and - * updates are either already stored in the build tree or in - * @wupd->entries - */ - if (likely(upd->parent == parent)) - return 0; - - upd->parent = parent; - upd->preexisting = true; - - if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) - return -EINVAL; - - entry = wupd->entries + wupd->num_used_entries++; - upd->update = entry; - entry->ofs = offset; - entry->pt_bo = parent->bo; - entry->pt = parent; - entry->flags = 0; - entry->qwords = 0; - entry->pt_bo->update_index = -1; - - if (alloc_entries) { - entry->pt_entries = kmalloc_array(XE_PDES, - sizeof(*entry->pt_entries), - GFP_KERNEL); - if (!entry->pt_entries) - return -ENOMEM; - } - - return 0; -} - -/* - * NOTE: This is a very frequently called function so we allow ourselves - * to annotate (using branch prediction hints) the fastpath of updating a - * non-pre-existing pagetable with leaf ptes. - */ -static int -xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, - pgoff_t offset, struct xe_pt *xe_child, u64 pte) -{ - struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; - struct xe_pt_update *child_upd = xe_child ? - &xe_walk->wupd.updates[xe_child->level] : NULL; - int ret; - - ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); - if (unlikely(ret)) - return ret; - - /* - * Register this new pagetable so that it won't be recognized as - * a shared pagetable by a subsequent insertion. - */ - if (unlikely(child_upd)) { - child_upd->update = NULL; - child_upd->parent = xe_child; - child_upd->preexisting = false; - } - - if (likely(!upd->preexisting)) { - /* Continue building a non-connected subtree. */ - struct iosys_map *map = &parent->bo->vmap; - - if (unlikely(xe_child)) - parent->base.children[offset] = &xe_child->base; - - xe_pt_write(xe_walk->vm->xe, map, offset, pte); - parent->num_live++; - } else { - /* Shared pt. Stage update. */ - unsigned int idx; - struct xe_vm_pgtable_update *entry = upd->update; - - idx = offset - entry->ofs; - entry->pt_entries[idx].pt = xe_child; - entry->pt_entries[idx].pte = pte; - entry->qwords++; - } - - return 0; -} - -static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, - struct xe_pt_stage_bind_walk *xe_walk) -{ - u64 size, dma; - - if (level > MAX_HUGEPTE_LEVEL) - return false; - - /* Does the virtual range requested cover a huge pte? */ - if (!xe_pt_covers(addr, next, level, &xe_walk->base)) - return false; - - /* Does the DMA segment cover the whole pte? */ - if (next - xe_walk->va_curs_start > xe_walk->curs->size) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - /* Is the DMA address huge PTE size aligned? */ - size = next - addr; - dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); - - return IS_ALIGNED(dma, size); -} - -/* - * Scan the requested mapping to check whether it can be done entirely - * with 64K PTEs. - */ -static bool -xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - struct xe_res_cursor curs = *xe_walk->curs; - - if (!IS_ALIGNED(addr, SZ_64K)) - return false; - - if (next > xe_walk->l0_end_addr) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - xe_res_next(&curs, addr - xe_walk->va_curs_start); - for (; addr < next; addr += SZ_64K) { - if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) - return false; - - xe_res_next(&curs, SZ_64K); - } - - return addr == next; -} - -/* - * For non-compact "normal" 4K level-0 pagetables, we want to try to group - * addresses together in 64K-contigous regions to add a 64K TLB hint for the - * device to the PTE. - * This function determines whether the address is part of such a - * segment. For VRAM in normal pagetables, this is strictly necessary on - * some devices. - */ -static bool -xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - /* Address is within an already found 64k region */ - if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) - return true; - - xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); - xe_walk->addr_64K = addr; - - return xe_walk->found_64K; -} - -static int -xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_bind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - u16 pat_index = xe_walk->vma->pat_index; - struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); - struct xe_vm *vm = xe_walk->vm; - struct xe_pt *xe_child; - bool covers; - int ret = 0; - u64 pte; - - /* Is this a leaf entry ?*/ - if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { - struct xe_res_cursor *curs = xe_walk->curs; - bool is_null = xe_vma_is_null(xe_walk->vma); - - XE_WARN_ON(xe_walk->va_curs_start != addr); - - pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : - xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->vma, pat_index, level); - pte |= xe_walk->default_pte; - - /* - * Set the XE_PTE_PS64 hint if possible, otherwise if - * this device *requires* 64K PTE size for VRAM, fail. - */ - if (level == 0 && !xe_parent->is_compact) { - if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; - pte |= XE_PTE_PS64; - } else if (XE_WARN_ON(xe_walk->needs_64K)) { - return -EINVAL; - } - } - - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); - if (unlikely(ret)) - return ret; - - if (!is_null) - xe_res_next(curs, next - addr); - xe_walk->va_curs_start = next; - xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); - *action = ACTION_CONTINUE; - - return ret; - } - - /* - * Descending to lower level. Determine if we need to allocate a - * new page table or -directory, which we do if there is no - * previous one or there is one we can completely replace. - */ - if (level == 1) { - walk->shifts = xe_normal_pt_shifts; - xe_walk->l0_end_addr = next; - } - - covers = xe_pt_covers(addr, next, level, &xe_walk->base); - if (covers || !*child) { - u64 flags = 0; - - xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); - if (IS_ERR(xe_child)) - return PTR_ERR(xe_child); - - xe_pt_set_addr(xe_child, - round_down(addr, 1ull << walk->shifts[level])); - - if (!covers) - xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child); - - *child = &xe_child->base; - - /* - * Prefer the compact pagetable layout for L0 if possible. Only - * possible if VMA covers entire 2MB region as compact 64k and - * 4k pages cannot be mixed within a 2MB region. - * TODO: Suballocate the pt bo to avoid wasting a lot of - * memory. - */ - if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && - covers && xe_pt_scan_64K(addr, next, xe_walk)) { - walk->shifts = xe_compact_pt_shifts; - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT; - flags |= XE_PDE_64K; - xe_child->is_compact = true; - } - - pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, - pte); - } - - *action = ACTION_SUBTREE; - return ret; -} - -static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { - .pt_entry = xe_pt_stage_bind_entry, -}; - -/** - * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address - * range. - * @tile: The tile we're building for. - * @vma: The vma indicating the address range. - * @entries: Storage for the update entries used for connecting the tree to - * the main tree at commit time. - * @num_entries: On output contains the number of @entries used. - * - * This function builds a disconnected page-table tree for a given address - * range. The tree is connected to the main vm tree for the gpu using - * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). - * The function builds xe_vm_pgtable_update structures for already existing - * shared page-tables, and non-existing shared and non-shared page-tables - * are built and populated directly. - * - * Return 0 on success, negative error code on error. - */ -static int -xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - struct xe_device *xe = tile_to_xe(tile); - struct xe_bo *bo = xe_vma_bo(vma); - bool is_devmem = !xe_vma_is_userptr(vma) && bo && - (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); - struct xe_res_cursor curs; - struct xe_pt_stage_bind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_bind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .vm = xe_vma_vm(vma), - .tile = tile, - .curs = &curs, - .va_curs_start = xe_vma_start(vma), - .vma = vma, - .wupd.entries = entries, - .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - int ret; - - /** - * Default atomic expectations for different allocation scenarios are as follows: - * - * 1. Traditional API: When the VM is not in LR mode: - * - Device atomics are expected to function with all allocations. - * - * 2. Compute/SVM API: When the VM is in LR mode: - * - Device atomics are the default behavior when the bo is placed in a single region. - * - In all other cases device atomics will be disabled with AE=0 until an application - * request differently using a ioctl like madvise. - */ - if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { - if (xe_vm_in_lr_mode(xe_vma_vm(vma))) { - if (bo && xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - /** - * If a SMEM+LMEM allocation is backed by SMEM, a device - * atomics will cause a gpu page fault and which then - * gets migrated to LMEM, bind such allocations with - * device atomics enabled. - */ - else if (is_devmem && !xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } else { - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } - - /** - * Unset AE if the platform(PVC) doesn't support it on an - * allocation - */ - if (!xe->info.has_device_atomics_on_smem && !is_devmem) - xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE; - } - - if (is_devmem) { - xe_walk.default_pte |= XE_PPGTT_PTE_DM; - xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); - } - - if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) - xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); - - xe_bo_assert_held(bo); - - if (!xe_vma_is_null(vma)) { - if (xe_vma_is_userptr(vma)) - xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, - xe_vma_size(vma), &curs); - else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) - xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - else - xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - } else { - curs.size = xe_vma_size(vma); - } - - ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - *num_entries = xe_walk.wupd.num_used_entries; - return ret; -} - -/** - * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a - * shared pagetable. - * @addr: The start address within the non-shared pagetable. - * @end: The end address within the non-shared pagetable. - * @level: The level of the non-shared pagetable. - * @walk: Walk info. The function adjusts the walk action. - * @action: next action to perform (see enum page_walk_action) - * @offset: Ignored on input, First non-shared entry on output. - * @end_offset: Ignored on input, Last non-shared entry + 1 on output. - * - * A non-shared page-table has some entries that belong to the address range - * and others that don't. This function determines the entries that belong - * fully to the address range. Depending on level, some entries may - * partially belong to the address range (that can't happen at level 0). - * The function detects that and adjust those offsets to not include those - * partial entries. Iff it does detect partial entries, we know that there must - * be shared page tables also at lower levels, so it adjusts the walk action - * accordingly. - * - * Return: true if there were non-shared entries, false otherwise. - */ -static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, - struct xe_pt_walk *walk, - enum page_walk_action *action, - pgoff_t *offset, pgoff_t *end_offset) -{ - u64 size = 1ull << walk->shifts[level]; - - *offset = xe_pt_offset(addr, level, walk); - *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; - - if (!level) - return true; - - /* - * If addr or next are not size aligned, there are shared pts at lower - * level, so in that case traverse down the subtree - */ - *action = ACTION_CONTINUE; - if (!IS_ALIGNED(addr, size)) { - *action = ACTION_SUBTREE; - (*offset)++; - } - - if (!IS_ALIGNED(end, size)) { - *action = ACTION_SUBTREE; - (*end_offset)--; - } - - return *end_offset > *offset; -} - -struct xe_pt_zap_ptes_walk { - /** @base: The walk base-class */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're building for */ - struct xe_tile *tile; - - /* Output */ - /** @needs_invalidate: Whether we need to invalidate TLB*/ - bool needs_invalidate; -}; - -static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_zap_ptes_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - /* - * Note that we're called from an entry callback, and we're dealing - * with the child of that entry rather than the parent, so need to - * adjust level down. - */ - if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, - &end_offset)) { - xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap, - offset * sizeof(u64), 0, - (end_offset - offset) * sizeof(u64)); - xe_walk->needs_invalidate = true; - } - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { - .pt_entry = xe_pt_zap_ptes_entry, -}; - -/** - * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range - * @tile: The tile we're zapping for. - * @vma: GPU VMA detailing address range. - * - * Eviction and Userptr invalidation needs to be able to zap the - * gpu ptes of a given address range in pagefaulting mode. - * In order to be able to do that, that function needs access to the shared - * page-table entrieaso it can either clear the leaf PTEs or - * clear the pointers to lower-level page-tables. The caller is required - * to hold the necessary locks to ensure neither the page-table connectivity - * nor the page-table entries of the range is updated from under us. - * - * Return: Whether ptes were actually updated and a TLB invalidation is - * required. - */ -bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) -{ - struct xe_pt_zap_ptes_walk xe_walk = { - .base = { - .ops = &xe_pt_zap_ptes_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); - - if (!(pt_mask & BIT(tile->id))) - return false; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.needs_invalidate; -} - -static void -xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, - struct iosys_map *map, void *data, - u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_pt_entry *ptes = update->pt_entries; - u64 *ptr = data; - u32 i; - - for (i = 0; i < num_qwords; i++) { - if (map) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, ptes[i].pte); - else - ptr[i] = ptes[i].pte; - } -} - -static void xe_pt_abort_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i, j; - - for (i = 0; i < num_entries; i++) { - if (!entries[i].pt_entries) - continue; - - for (j = 0; j < entries[i].qwords; j++) - xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL); - kfree(entries[i].pt_entries); - } -} - -static void xe_pt_commit_locks_assert(struct xe_vma *vma) -{ - struct xe_vm *vm = xe_vma_vm(vma); - - lockdep_assert_held(&vm->lock); - - if (!xe_vma_is_userptr(vma) && !xe_vma_is_null(vma)) - dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); - - xe_vm_assert_held(vm); -} - -static void xe_pt_commit_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries, bool rebind, - struct llist_head *deferred) -{ - u32 i, j; - - xe_pt_commit_locks_assert(vma); - - for (i = 0; i < num_entries; i++) { - struct xe_pt *pt = entries[i].pt; - struct xe_pt_dir *pt_dir; - - if (!rebind) - pt->num_live += entries[i].qwords; - - if (!pt->level) - continue; - - pt_dir = as_xe_pt_dir(pt); - for (j = 0; j < entries[i].qwords; j++) { - u32 j_ = j + entries[i].ofs; - struct xe_pt *newpte = entries[i].pt_entries[j].pt; - - if (xe_pt_entry(pt_dir, j_)) - xe_pt_destroy(xe_pt_entry(pt_dir, j_), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[j_] = &newpte->base; - } - } -} - -static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i; - - for (i = 0; i < num_entries; i++) - kfree(entries[i].pt_entries); -} - -static int -xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - int err; - - *num_entries = 0; - err = xe_pt_stage_bind(tile, vma, entries, num_entries); - if (!err) - xe_tile_assert(tile, *num_entries); - else /* abort! */ - xe_pt_abort_bind(vma, entries, *num_entries); - - return err; -} - -static void xe_vm_dbg_print_entries(struct xe_device *xe, - const struct xe_vm_pgtable_update *entries, - unsigned int num_entries, bool bind) -#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) -{ - unsigned int i; - - vm_dbg(&xe->drm, "%s: %u entries to update\n", bind ? "bind" : "unbind", - num_entries); - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - struct xe_pt *xe_pt = entry->pt; - u64 page_size = 1ull << xe_pt_shift(xe_pt->level); - u64 end; - u64 start; - - xe_assert(xe, !entry->pt->is_compact); - start = entry->ofs * page_size; - end = start + page_size * entry->qwords; - vm_dbg(&xe->drm, - "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", - i, xe_pt->level, entry->ofs, entry->qwords, - xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); - } -} -#else -{} -#endif - -static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs) -{ - int i; - - for (i = 0; i < num_syncs; i++) { - struct dma_fence *fence = syncs[i].fence; - - if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &fence->flags)) - return false; - } - - return true; -} - -static int job_test_add_deps(struct xe_sched_job *job, - struct dma_resv *resv, - enum dma_resv_usage usage) -{ - if (!job) { - if (!dma_resv_test_signaled(resv, usage)) - return -ETIME; - - return 0; - } - - return xe_sched_job_add_deps(job, resv, usage); -} - -static int vma_add_deps(struct xe_vma *vma, struct xe_sched_job *job) -{ - struct xe_bo *bo = xe_vma_bo(vma); - - xe_bo_assert_held(bo); - - if (bo && !bo->vm) - return job_test_add_deps(job, bo->ttm.base.resv, - DMA_RESV_USAGE_KERNEL); - - return 0; -} - -static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op, - struct xe_sched_job *job) -{ - int err = 0; - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = vma_add_deps(op->map.vma, job); - break; - case DRM_GPUVA_OP_REMAP: - if (op->remap.prev) - err = vma_add_deps(op->remap.prev, job); - if (!err && op->remap.next) - err = vma_add_deps(op->remap.next, job); - break; - case DRM_GPUVA_OP_UNMAP: - break; - case DRM_GPUVA_OP_PREFETCH: - err = vma_add_deps(gpuva_to_vma(op->base.prefetch.va), job); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static int xe_pt_vm_dependencies(struct xe_sched_job *job, - struct xe_vm *vm, - struct xe_vma_ops *vops, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_range_fence_tree *rftree) -{ - struct xe_range_fence *rtfence; - struct dma_fence *fence; - struct xe_vma_op *op; - int err = 0, i; - - xe_vm_assert_held(vm); - - if (!job && !no_in_syncs(vops->syncs, vops->num_syncs)) - return -ETIME; - - if (!job && !xe_exec_queue_is_idle(pt_update_ops->q)) - return -ETIME; - - if (pt_update_ops->wait_vm_bookkeep || pt_update_ops->wait_vm_kernel) { - err = job_test_add_deps(job, xe_vm_resv(vm), - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_BOOKKEEP : - DMA_RESV_USAGE_KERNEL); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_first(rftree, pt_update_ops->start, - pt_update_ops->last); - while (rtfence) { - fence = rtfence->fence; - - if (!dma_fence_is_signaled(fence)) { - /* - * Is this a CPU update? GPU is busy updating, so return - * an error - */ - if (!job) - return -ETIME; - - dma_fence_get(fence); - err = drm_sched_job_add_dependency(&job->drm, fence); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_next(rtfence, - pt_update_ops->start, - pt_update_ops->last); - } - - list_for_each_entry(op, &vops->list, link) { - err = op_add_deps(vm, op, job); - if (err) - return err; - } - - if (job) - err = xe_sched_job_last_fence_add_dep(job, vm); - else - err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); - - for (i = 0; job && !err && i < vops->num_syncs; i++) - err = xe_sync_entry_add_deps(&vops->syncs[i], job); - - return err; -} - -static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_vma_ops *vops = pt_update->vops; - struct xe_vm *vm = vops->vm; - struct xe_range_fence_tree *rftree = &vm->rftree[pt_update->tile_id]; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[pt_update->tile_id]; - - return xe_pt_vm_dependencies(pt_update->job, vm, pt_update->vops, - pt_update_ops, rftree); -} - -#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; - static u32 count; - - if (count++ % divisor == divisor - 1) { - uvma->userptr.divisor = divisor << 1; - return true; - } - - return false; -} - -#else - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - return false; -} - -#endif - -static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, - struct xe_vm_pgtable_update_ops *pt_update) -{ - struct xe_userptr_vma *uvma; - unsigned long notifier_seq; - - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - if (!xe_vma_is_userptr(vma)) - return 0; - - uvma = to_userptr_vma(vma); - notifier_seq = uvma->userptr.notifier_seq; - - if (uvma->userptr.initial_bind && !xe_vm_in_fault_mode(vm)) - return 0; - - if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq) && - !xe_pt_userptr_inject_eagain(uvma)) - return 0; - - if (xe_vm_in_fault_mode(vm)) { - return -EAGAIN; - } else { - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - - if (xe_vm_in_preempt_fence_mode(vm)) { - struct dma_resv_iter cursor; - struct dma_fence *fence; - long err; - - dma_resv_iter_begin(&cursor, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); - dma_resv_for_each_fence_unlocked(&cursor, fence) - dma_fence_enable_sw_signaling(fence); - dma_resv_iter_end(&cursor); - - err = dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - XE_WARN_ON(err <= 0); - } - } - - return 0; -} - -static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op, - struct xe_vm_pgtable_update_ops *pt_update) -{ - int err = 0; - - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = vma_check_userptr(vm, op->map.vma, pt_update); - break; - case DRM_GPUVA_OP_REMAP: - if (op->remap.prev) - err = vma_check_userptr(vm, op->remap.prev, pt_update); - if (!err && op->remap.next) - err = vma_check_userptr(vm, op->remap.next, pt_update); - break; - case DRM_GPUVA_OP_UNMAP: - break; - case DRM_GPUVA_OP_PREFETCH: - err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), - pt_update); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_vm *vm = pt_update->vops->vm; - struct xe_vma_ops *vops = pt_update->vops; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[pt_update->tile_id]; - struct xe_vma_op *op; - int err; - - err = xe_pt_pre_commit(pt_update); - if (err) - return err; - - down_read(&vm->userptr.notifier_lock); - - list_for_each_entry(op, &vops->list, link) { - err = op_check_userptr(vm, op, pt_update_ops); - if (err) { - up_read(&vm->userptr.notifier_lock); - break; - } - } - - return err; -} - -struct invalidation_fence { - struct xe_gt_tlb_invalidation_fence base; - struct xe_gt *gt; - struct dma_fence *fence; - struct dma_fence_cb cb; - struct work_struct work; - u64 start; - u64 end; - u32 asid; -}; - -static void invalidation_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct invalidation_fence *ifence = - container_of(cb, struct invalidation_fence, cb); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); - if (!ifence->fence->error) { - queue_work(system_wq, &ifence->work); - } else { - ifence->base.base.error = ifence->fence->error; - dma_fence_signal(&ifence->base.base); - dma_fence_put(&ifence->base.base); - } - dma_fence_put(ifence->fence); -} - -static void invalidation_fence_work_func(struct work_struct *w) -{ - struct invalidation_fence *ifence = - container_of(w, struct invalidation_fence, work); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); - xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, - ifence->end, ifence->asid); -} - -static int invalidation_fence_init(struct xe_gt *gt, - struct invalidation_fence *ifence, - struct dma_fence *fence, - u64 start, u64 end, u32 asid) -{ - int ret; - - trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - - ifence->fence = fence; - ifence->gt = gt; - ifence->start = start; - ifence->end = end; - ifence->asid = asid; - - INIT_WORK(&ifence->work, invalidation_fence_work_func); - ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); - if (ret == -ENOENT) { - dma_fence_put(ifence->fence); /* Usually dropped in CB */ - invalidation_fence_work_func(&ifence->work); - } else if (ret) { - dma_fence_put(&ifence->base.base); /* Caller ref */ - dma_fence_put(&ifence->base.base); /* Creation ref */ - } - - xe_gt_assert(gt, !ret || ret == -ENOENT); - - return ret && ret != -ENOENT ? ret : 0; -} - -struct xe_pt_stage_unbind_walk { - /** @base: The pagewalk base-class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're unbinding from. */ - struct xe_tile *tile; - - /** - * @modified_start: Walk range start, modified to include any - * shared pagetables that we're the only user of and can thus - * treat as private. - */ - u64 modified_start; - /** @modified_end: Walk range start, modified like @modified_start. */ - u64 modified_end; - - /* Output */ - /* @wupd: Structure to track the page-table updates we're building */ - struct xe_walk_update wupd; -}; - -/* - * Check whether this range is the only one populating this pagetable, - * and in that case, update the walk range checks so that higher levels don't - * view us as a shared pagetable. - */ -static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, - const struct xe_pt *child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - unsigned int shift = walk->shifts[level]; - u64 size = 1ull << shift; - - if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && - ((next - addr) >> shift) == child->num_live) { - u64 size = 1ull << walk->shifts[level + 1]; - - *action = ACTION_CONTINUE; - - if (xe_walk->modified_start >= addr) - xe_walk->modified_start = round_down(addr, size); - if (xe_walk->modified_end <= next) - xe_walk->modified_end = round_up(next, size); - - return true; - } - - return false; -} - -static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); - - return 0; -} - -static int -xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - u64 size = 1ull << walk->shifts[--level]; - - if (!IS_ALIGNED(addr, size)) - addr = xe_walk->modified_start; - if (!IS_ALIGNED(next, size)) - next = xe_walk->modified_end; - - /* Parent == *child is the root pt. Don't kill it. */ - if (parent != *child && - xe_pt_check_kill(addr, next, level, xe_child, action, walk)) - return 0; - - if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, - &end_offset)) - return 0; - - (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false); - xe_walk->wupd.updates[level].update->qwords = end_offset - offset; - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { - .pt_entry = xe_pt_stage_unbind_entry, - .pt_post_descend = xe_pt_stage_unbind_post_descend, -}; - -/** - * xe_pt_stage_unbind() - Build page-table update structures for an unbind - * operation - * @tile: The tile we're unbinding for. - * @vma: The vma we're unbinding. - * @entries: Caller-provided storage for the update structures. - * - * Builds page-table update structures for an unbind operation. The function - * will attempt to remove all page-tables that we're the only user - * of, and for that to work, the unbind operation must be committed in the - * same critical section that blocks racing binds to the same page-table tree. - * - * Return: The number of entries used. - */ -static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries) -{ - struct xe_pt_stage_unbind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_unbind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - .modified_start = xe_vma_start(vma), - .modified_end = xe_vma_end(vma), - .wupd.entries = entries, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.wupd.num_used_entries; -} - -static void -xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, - struct xe_tile *tile, struct iosys_map *map, - void *ptr, u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_vm *vm = pt_update->vops->vm; - u64 empty = __xe_pt_empty_pte(tile, vm, update->pt->level); - int i; - - if (map && map->is_iomem) - for (i = 0; i < num_qwords; ++i) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, empty); - else if (map) - memset64(map->vaddr + qword_ofs * sizeof(u64), empty, - num_qwords); - else - memset64(ptr, empty, num_qwords); -} - -static void -xe_pt_commit_unbind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 num_entries, - struct llist_head *deferred) -{ - u32 j; - - xe_pt_commit_locks_assert(vma); - - for (j = 0; j < num_entries; ++j) { - struct xe_vm_pgtable_update *entry = &entries[j]; - struct xe_pt *pt = entry->pt; - - pt->num_live -= entry->qwords; - if (pt->level) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - u32 i; - - for (i = entry->ofs; i < entry->ofs + entry->qwords; - i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[i] = NULL; - } - } - } -} - -static void -xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int i, level = 0; - u64 start, last; - - for (i = 0; i < pt_op->num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &pt_op->entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level)); - last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1; - - if (start < pt_update_ops->start) - pt_update_ops->start = start; - if (last > pt_update_ops->last) - pt_update_ops->last = last; -} - -static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, - xe->info.tile_count); - - return 0; -} - -static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - struct llist_head *deferred = &pt_update_ops->deferred; - int err; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - pt_op->vma = NULL; - pt_op->bind = true; - pt_op->rebind = BIT(tile->id) & vma->tile_present; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - err = xe_pt_prepare_bind(tile, vma, pt_op->entries, - &pt_op->num_entries); - if (!err) { - xe_tile_assert(tile, pt_op->num_entries <= - ARRAY_SIZE(pt_op->entries)); - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, true); - - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. On LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * On !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!pt_op->rebind && xe_vm_has_scratch(vm) && - xe_vm_in_preempt_fence_mode(vm))) - pt_update_ops->needs_invalidation = true; - else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - - /* FIXME: Don't commit right away */ - vma->tile_staged |= BIT(tile->id); - pt_op->vma = vma; - xe_pt_commit_bind(vma, pt_op->entries, pt_op->num_entries, - pt_op->rebind, deferred); - } - - return err; -} - -static int unbind_op_prepare(struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - struct llist_head *deferred = &pt_update_ops->deferred; - int err; - - if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id))) - return 0; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - /* - * Wait for invalidation to complete. Can corrupt internal page table - * state if an invalidation is running while preparing an unbind. - */ - if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma))) - mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier); - - pt_op->vma = vma; - pt_op->bind = false; - pt_op->rebind = false; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries); - - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, false); - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - pt_update_ops->needs_invalidation = true; - - /* FIXME: Don't commit right away */ - xe_pt_commit_unbind(vma, pt_op->entries, pt_op->num_entries, - deferred); - - return 0; -} - -static int op_prepare(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op) -{ - int err = 0; - - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma); - pt_update_ops->wait_vm_kernel = true; - break; - case DRM_GPUVA_OP_REMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va)); - - if (!err && op->remap.prev) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.prev); - pt_update_ops->wait_vm_bookkeep = true; - } - if (!err && op->remap.next) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.next); - pt_update_ops->wait_vm_bookkeep = true; - } - break; - case DRM_GPUVA_OP_UNMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va)); - break; - case DRM_GPUVA_OP_PREFETCH: - err = bind_op_prepare(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va)); - pt_update_ops->wait_vm_kernel = true; - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static void -xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops) -{ - init_llist_head(&pt_update_ops->deferred); - pt_update_ops->start = ~0x0ull; - pt_update_ops->last = 0x0ull; -} - -/** - * xe_pt_update_ops_prepare() - Prepare PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Prepare PT update operations which includes updating internal PT state, - * allocate memory for page tables, populate page table being pruned in, and - * create PT update operations for leaf insertion / removal. - * - * Return: 0 on success, negative error code on error. - */ -int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct xe_vma_op *op; - int err; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - xe_pt_update_ops_init(pt_update_ops); - - err = dma_resv_reserve_fences(xe_vm_resv(vops->vm), - tile_to_xe(tile)->info.tile_count); - if (err) - return err; - - list_for_each_entry(op, &vops->list, link) { - err = op_prepare(vops->vm, tile, pt_update_ops, op); - - if (err) - return err; - } - - xe_tile_assert(tile, pt_update_ops->current_op <= - pt_update_ops->num_ops); - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE) - return -ENOSPC; -#endif - - return 0; -} - -static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - vma->tile_present |= BIT(tile->id); - vma->tile_staged &= ~BIT(tile->id); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - to_userptr_vma(vma)->userptr.initial_bind = true; - } - - /* - * Kick rebind worker if this bind triggers preempt fences and not in - * the rebind worker - */ - if (pt_update_ops->wait_vm_bookkeep && - xe_vm_in_preempt_fence_mode(vm) && - !current->mm) - xe_vm_queue_rebind_worker(vm); -} - -static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - vma->tile_present &= ~BIT(tile->id); - if (!vma->tile_present) { - list_del_init(&vma->combined_links.rebind); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - } -} - -static void op_commit(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op, struct dma_fence *fence) -{ - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence); - break; - case DRM_GPUVA_OP_REMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence); - - if (op->remap.prev) - bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence); - if (op->remap.next) - bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence); - break; - case DRM_GPUVA_OP_UNMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence); - break; - case DRM_GPUVA_OP_PREFETCH: - bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } -} - -static const struct xe_migrate_pt_update_ops migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -/** - * xe_pt_update_ops_run() - Run PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Run PT update operations which includes committing internal PT state changes, - * creating job for PT update operations for leaf insertion / removal, and - * installing job fence in various places. - * - * Return: fence on success, negative ERR_PTR on error. - */ -struct dma_fence * -xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm *vm = vops->vm; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL; - struct xe_range_fence *rfence; - struct xe_vma_op *op; - int err = 0; - struct xe_migrate_pt_update update = { - .ops = pt_update_ops->needs_userptr_lock ? - &userptr_migrate_ops : - &migrate_ops, - .vops = vops, - .tile_id = tile->id, - }; - - lockdep_assert_held(&vm->lock); - xe_vm_assert_held(vm); - - if (!pt_update_ops->current_op) { - xe_tile_assert(tile, xe_vm_in_fault_mode(vm)); - - return dma_fence_get_stub(); - } - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vm->xe->vm_inject_error_position == FORCE_OP_ERROR_RUN) - return ERR_PTR(-ENOSPC); -#endif - - if (pt_update_ops->needs_invalidation) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); -<<<<<<< - if (!ifence) - return ERR_PTR(-ENOMEM); -======= - if (!ifence) { - err = -ENOMEM; - goto kill_vm_tile1; - } ->>>>>>> - } - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - err = -ENOMEM; - goto free_ifence; - } - - fence = xe_migrate_update_pgtables(tile->migrate, &update); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - goto free_rfence; - } - - if (xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - pt_update_ops->start, - pt_update_ops->last, fence)) - dma_fence_wait(fence, false); - - /* tlb invalidation must be done before signaling rebind */ - if (ifence) { -<<<<<<< - err = invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, - vm->usm.asid); - if (err) - goto put_fence; -======= - invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); ->>>>>>> - fence = &ifence->base.base; - } - - dma_resv_add_fence(xe_vm_resv(vm), fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, fence); - - if (pt_update_ops->needs_userptr_lock) - up_read(&vm->userptr.notifier_lock); - - return fence; - -put_fence: - if (pt_update_ops->needs_userptr_lock) - up_read(&vm->userptr.notifier_lock); - dma_fence_put(fence); -free_rfence: - kfree(rfence); -free_ifence: - kfree(ifence); - - return ERR_PTR(err); -} - -/** - * xe_pt_update_ops_fini() - Finish PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operations - * - * Finish PT update operations by committing to destroy page table memory - */ -void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - /* FIXME: Not 100% correct */ - for (i = 0; i < pt_update_ops->num_ops; ++i) { - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; - - if (pt_op->bind) - xe_pt_free_bind(pt_op->entries, pt_op->num_entries); - } - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); -} - -/** - * xe_pt_update_ops_abort() - Abort PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Abort PT update operations by unwinding internal PT state - */ -void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - /* FIXME: Just kill VM for now + cleanup PTs */ - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); - xe_vm_kill(vops->vm, false); -} diff --git a/rr-cache/335956d3d50917a00e263f413df1abe74b56b110/postimage b/rr-cache/335956d3d50917a00e263f413df1abe74b56b110/postimage deleted file mode 100644 index e6fc253cb4b5..000000000000 --- a/rr-cache/335956d3d50917a00e263f413df1abe74b56b110/postimage +++ /dev/null @@ -1,77 +0,0 @@ -# SPDX-License-Identifier: MIT - -config DRM_DISPLAY_DP_AUX_BUS - tristate - depends on DRM - depends on OF || COMPILE_TEST - -config DRM_DISPLAY_HELPER - tristate - depends on DRM - help - DRM helpers for display adapters. - -if DRM_DISPLAY_HELPER - -config DRM_DISPLAY_DP_AUX_CEC - bool "Enable DisplayPort CEC-Tunneling-over-AUX HDMI support" - select DRM_DISPLAY_DP_HELPER - select CEC_CORE - help - Choose this option if you want to enable HDMI CEC support for - DisplayPort/USB-C to HDMI adapters. - - Note: not all adapters support this feature, and even for those - that do support this they often do not hook up the CEC pin. - -config DRM_DISPLAY_DP_AUX_CHARDEV - bool "DRM DP AUX Interface" - select DRM_DISPLAY_DP_HELPER - help - Choose this option to enable a /dev/drm_dp_auxN node that allows to - read and write values to arbitrary DPCD registers on the DP aux - channel. - -config DRM_DISPLAY_DP_HELPER - bool - help - DRM display helpers for DisplayPort. - -config DRM_DISPLAY_DP_TUNNEL - bool - select DRM_DISPLAY_DP_HELPER - help - Enable support for DisplayPort tunnels. This allows drivers to use - DP tunnel features like the Bandwidth Allocation mode to maximize the - BW utilization for display streams on Thunderbolt links. - -config DRM_DISPLAY_DP_TUNNEL_STATE_DEBUG - bool "Enable debugging the DP tunnel state" - depends on REF_TRACKER - depends on DRM_DISPLAY_DP_TUNNEL - depends on DEBUG_KERNEL - depends on EXPERT - help - Enables debugging the DP tunnel manager's state, including the - consistency of all managed tunnels' reference counting and the state of - streams contained in tunnels. - - If in doubt, say "N". - -config DRM_DISPLAY_HDCP_HELPER - bool - help - DRM display helpers for HDCP. - -config DRM_DISPLAY_HDMI_HELPER - bool - help - DRM display helpers for HDMI. - -config DRM_DISPLAY_HDMI_STATE_HELPER - bool - select DRM_DISPLAY_HDMI_HELPER - help - DRM KMS state helpers for HDMI. - -endif # DRM_DISPLAY_HELPER diff --git a/rr-cache/335956d3d50917a00e263f413df1abe74b56b110/preimage b/rr-cache/335956d3d50917a00e263f413df1abe74b56b110/preimage deleted file mode 100644 index 5070b374a59b..000000000000 --- a/rr-cache/335956d3d50917a00e263f413df1abe74b56b110/preimage +++ /dev/null @@ -1,85 +0,0 @@ -# SPDX-License-Identifier: MIT - -config DRM_DISPLAY_DP_AUX_BUS - tristate - depends on DRM - depends on OF || COMPILE_TEST - -config DRM_DISPLAY_HELPER - tristate - depends on DRM - help - DRM helpers for display adapters. - -if DRM_DISPLAY_HELPER - -config DRM_DISPLAY_DP_AUX_CEC - bool "Enable DisplayPort CEC-Tunneling-over-AUX HDMI support" - select DRM_DISPLAY_DP_HELPER - select CEC_CORE - help - Choose this option if you want to enable HDMI CEC support for - DisplayPort/USB-C to HDMI adapters. - - Note: not all adapters support this feature, and even for those - that do support this they often do not hook up the CEC pin. - -config DRM_DISPLAY_DP_AUX_CHARDEV - bool "DRM DP AUX Interface" - select DRM_DISPLAY_DP_HELPER - help - Choose this option to enable a /dev/drm_dp_auxN node that allows to - read and write values to arbitrary DPCD registers on the DP aux - channel. - -config DRM_DISPLAY_DP_HELPER - bool - help - DRM display helpers for DisplayPort. - -config DRM_DISPLAY_DP_TUNNEL - bool - select DRM_DISPLAY_DP_HELPER - help - Enable support for DisplayPort tunnels. This allows drivers to use - DP tunnel features like the Bandwidth Allocation mode to maximize the - BW utilization for display streams on Thunderbolt links. - -config DRM_DISPLAY_DP_TUNNEL_STATE_DEBUG - bool "Enable debugging the DP tunnel state" - depends on REF_TRACKER - depends on DRM_DISPLAY_DP_TUNNEL - depends on DEBUG_KERNEL - depends on EXPERT - help - Enables debugging the DP tunnel manager's state, including the - consistency of all managed tunnels' reference counting and the state of - streams contained in tunnels. - - If in doubt, say "N". - -<<<<<<< -======= -config DRM_DISPLAY_DSC_HELPER - bool - help - DRM display helpers for VESA DSC (used by DSI and DisplayPort). - ->>>>>>> -config DRM_DISPLAY_HDCP_HELPER - bool - help - DRM display helpers for HDCP. - -config DRM_DISPLAY_HDMI_HELPER - bool - help - DRM display helpers for HDMI. - -config DRM_DISPLAY_HDMI_STATE_HELPER - bool - select DRM_DISPLAY_HDMI_HELPER - help - DRM KMS state helpers for HDMI. - -endif # DRM_DISPLAY_HELPER diff --git a/rr-cache/41549dd6cc337627199acbe6749c5685c7e927d3/preimage.3 b/rr-cache/41549dd6cc337627199acbe6749c5685c7e927d3/preimage.3 deleted file mode 100644 index 232d29e9a561..000000000000 --- a/rr-cache/41549dd6cc337627199acbe6749c5685c7e927d3/preimage.3 +++ /dev/null @@ -1,2234 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include <linux/dma-fence-chain.h> - -#include "xe_pt.h" - -#include "regs/xe_gtt_defs.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_gt.h" -#include "xe_gt_tlb_invalidation.h" -#include "xe_migrate.h" -#include "xe_pt_types.h" -#include "xe_pt_walk.h" -#include "xe_res_cursor.h" -#include "xe_trace.h" -#include "xe_ttm_stolen_mgr.h" -#include "xe_vm.h" - -struct xe_pt_dir { - struct xe_pt pt; - /** @children: Array of page-table child nodes */ - struct xe_ptw *children[XE_PDES]; -}; - -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) -#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) -#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) -#else -#define xe_pt_set_addr(__xe_pt, __addr) -#define xe_pt_addr(__xe_pt) 0ull -#endif - -static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; -static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; - -#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) - -static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) -{ - return container_of(pt, struct xe_pt_dir, pt); -} - -static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) -{ - return container_of(pt_dir->children[index], struct xe_pt, base); -} - -static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, - unsigned int level) -{ - struct xe_device *xe = tile_to_xe(tile); - u16 pat_index = xe->pat.idx[XE_CACHE_WB]; - u8 id = tile->id; - - if (!xe_vm_has_scratch(vm)) - return 0; - - if (level > MAX_HUGEPTE_LEVEL) - return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, - 0, pat_index); - - return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | - XE_PTE_NULL; -} - -static void xe_pt_free(struct xe_pt *pt) -{ - if (pt->level) - kfree(as_xe_pt_dir(pt)); - else - kfree(pt); -} - -/** - * xe_pt_create() - Create a page-table. - * @vm: The vm to create for. - * @tile: The tile to create for. - * @level: The page-table level. - * - * Allocate and initialize a single struct xe_pt metadata structure. Also - * create the corresponding page-table bo, but don't initialize it. If the - * level is grater than zero, then it's assumed to be a directory page- - * table and the directory structure is also allocated and initialized to - * NULL pointers. - * - * Return: A valid struct xe_pt pointer on success, Pointer error code on - * error. - */ -struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level) -{ - struct xe_pt *pt; - struct xe_bo *bo; - int err; - - if (level) { - struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL); - - pt = (dir) ? &dir->pt : NULL; - } else { - pt = kzalloc(sizeof(*pt), GFP_KERNEL); - } - if (!pt) - return ERR_PTR(-ENOMEM); - - pt->level = level; - bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_NO_RESV_EVICT | - XE_BO_FLAG_PAGETABLE); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - goto err_kfree; - } - pt->bo = bo; - pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; - - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, pt->bo); - xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL); - - return pt; - -err_kfree: - xe_pt_free(pt); - return ERR_PTR(err); -} - -/** - * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero - * entries. - * @tile: The tile the scratch pagetable of which to use. - * @vm: The vm we populate for. - * @pt: The pagetable the bo of which to initialize. - * - * Populate the page-table bo of @pt with entries pointing into the tile's - * scratch page-table tree if any. Otherwise populate with zeros. - */ -void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, - struct xe_pt *pt) -{ - struct iosys_map *map = &pt->bo->vmap; - u64 empty; - int i; - - if (!xe_vm_has_scratch(vm)) { - /* - * FIXME: Some memory is allocated already allocated to zero? - * Find out which memory that is and avoid this memset... - */ - xe_map_memset(vm->xe, map, 0, 0, SZ_4K); - } else { - empty = __xe_pt_empty_pte(tile, vm, pt->level); - for (i = 0; i < XE_PDES; i++) - xe_pt_write(vm->xe, map, i, empty); - } -} - -/** - * xe_pt_shift() - Return the ilog2 value of the size of the address range of - * a page-table at a certain level. - * @level: The level. - * - * Return: The ilog2 value of the size of the address range of a page-table - * at level @level. - */ -unsigned int xe_pt_shift(unsigned int level) -{ - return XE_PTE_SHIFT + XE_PDE_SHIFT * level; -} - -/** - * xe_pt_destroy() - Destroy a page-table tree. - * @pt: The root of the page-table tree to destroy. - * @flags: vm flags. Currently unused. - * @deferred: List head of lockless list for deferred putting. NULL for - * immediate putting. - * - * Puts the page-table bo, recursively calls xe_pt_destroy on all children - * and finally frees @pt. TODO: Can we remove the @flags argument? - */ -void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) -{ - int i; - - if (!pt) - return; - - XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list)); - xe_bo_unpin(pt->bo); - xe_bo_put_deferred(pt->bo, deferred); - - if (pt->level > 0 && pt->num_live) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - - for (i = 0; i < XE_PDES; i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, - deferred); - } - } - xe_pt_free(pt); -} - -/** - * DOC: Pagetable building - * - * Below we use the term "page-table" for both page-directories, containing - * pointers to lower level page-directories or page-tables, and level 0 - * page-tables that contain only page-table-entries pointing to memory pages. - * - * When inserting an address range in an already existing page-table tree - * there will typically be a set of page-tables that are shared with other - * address ranges, and a set that are private to this address range. - * The set of shared page-tables can be at most two per level, - * and those can't be updated immediately because the entries of those - * page-tables may still be in use by the gpu for other mappings. Therefore - * when inserting entries into those, we instead stage those insertions by - * adding insertion data into struct xe_vm_pgtable_update structures. This - * data, (subtrees for the cpu and page-table-entries for the gpu) is then - * added in a separate commit step. CPU-data is committed while still under the - * vm lock, the object lock and for userptr, the notifier lock in read mode. - * The GPU async data is committed either by the GPU or CPU after fulfilling - * relevant dependencies. - * For non-shared page-tables (and, in fact, for shared ones that aren't - * existing at the time of staging), we add the data in-place without the - * special update structures. This private part of the page-table tree will - * remain disconnected from the vm page-table tree until data is committed to - * the shared page tables of the vm tree in the commit phase. - */ - -struct xe_pt_update { - /** @update: The update structure we're building for this parent. */ - struct xe_vm_pgtable_update *update; - /** @parent: The parent. Used to detect a parent change. */ - struct xe_pt *parent; - /** @preexisting: Whether the parent was pre-existing or allocated */ - bool preexisting; -}; - -struct xe_pt_stage_bind_walk { - /** base: The base class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @vm: The vm we're building for. */ - struct xe_vm *vm; - /** @tile: The tile we're building for. */ - struct xe_tile *tile; - /** @default_pte: PTE flag only template. No address is associated */ - u64 default_pte; - /** @dma_offset: DMA offset to add to the PTE. */ - u64 dma_offset; - /** - * @needs_64k: This address range enforces 64K alignment and - * granularity. - */ - bool needs_64K; - /** - * @vma: VMA being mapped - */ - struct xe_vma *vma; - - /* Also input, but is updated during the walk*/ - /** @curs: The DMA address cursor. */ - struct xe_res_cursor *curs; - /** @va_curs_start: The Virtual address coresponding to @curs->start */ - u64 va_curs_start; - - /* Output */ - struct xe_walk_update { - /** @wupd.entries: Caller provided storage. */ - struct xe_vm_pgtable_update *entries; - /** @wupd.num_used_entries: Number of update @entries used. */ - unsigned int num_used_entries; - /** @wupd.updates: Tracks the update entry at a given level */ - struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; - } wupd; - - /* Walk state */ - /** - * @l0_end_addr: The end address of the current l0 leaf. Used for - * 64K granularity detection. - */ - u64 l0_end_addr; - /** @addr_64K: The start address of the current 64K chunk. */ - u64 addr_64K; - /** @found_64: Whether @add_64K actually points to a 64K chunk. */ - bool found_64K; -}; - -static int -xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, - pgoff_t offset, bool alloc_entries) -{ - struct xe_pt_update *upd = &wupd->updates[parent->level]; - struct xe_vm_pgtable_update *entry; - - /* - * For *each level*, we could only have one active - * struct xt_pt_update at any one time. Once we move on to a - * new parent and page-directory, the old one is complete, and - * updates are either already stored in the build tree or in - * @wupd->entries - */ - if (likely(upd->parent == parent)) - return 0; - - upd->parent = parent; - upd->preexisting = true; - - if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) - return -EINVAL; - - entry = wupd->entries + wupd->num_used_entries++; - upd->update = entry; - entry->ofs = offset; - entry->pt_bo = parent->bo; - entry->pt = parent; - entry->flags = 0; - entry->qwords = 0; - - if (alloc_entries) { - entry->pt_entries = kmalloc_array(XE_PDES, - sizeof(*entry->pt_entries), - GFP_KERNEL); - if (!entry->pt_entries) - return -ENOMEM; - } - - return 0; -} - -/* - * NOTE: This is a very frequently called function so we allow ourselves - * to annotate (using branch prediction hints) the fastpath of updating a - * non-pre-existing pagetable with leaf ptes. - */ -static int -xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, - pgoff_t offset, struct xe_pt *xe_child, u64 pte) -{ - struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; - struct xe_pt_update *child_upd = xe_child ? - &xe_walk->wupd.updates[xe_child->level] : NULL; - int ret; - - ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); - if (unlikely(ret)) - return ret; - - /* - * Register this new pagetable so that it won't be recognized as - * a shared pagetable by a subsequent insertion. - */ - if (unlikely(child_upd)) { - child_upd->update = NULL; - child_upd->parent = xe_child; - child_upd->preexisting = false; - } - - if (likely(!upd->preexisting)) { - /* Continue building a non-connected subtree. */ - struct iosys_map *map = &parent->bo->vmap; - - if (unlikely(xe_child)) - parent->base.children[offset] = &xe_child->base; - - xe_pt_write(xe_walk->vm->xe, map, offset, pte); - parent->num_live++; - } else { - /* Shared pt. Stage update. */ - unsigned int idx; - struct xe_vm_pgtable_update *entry = upd->update; - - idx = offset - entry->ofs; - entry->pt_entries[idx].pt = xe_child; - entry->pt_entries[idx].pte = pte; - entry->qwords++; - } - - return 0; -} - -static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, - struct xe_pt_stage_bind_walk *xe_walk) -{ - u64 size, dma; - - if (level > MAX_HUGEPTE_LEVEL) - return false; - - /* Does the virtual range requested cover a huge pte? */ - if (!xe_pt_covers(addr, next, level, &xe_walk->base)) - return false; - - /* Does the DMA segment cover the whole pte? */ - if (next - xe_walk->va_curs_start > xe_walk->curs->size) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - /* Is the DMA address huge PTE size aligned? */ - size = next - addr; - dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); - - return IS_ALIGNED(dma, size); -} - -/* - * Scan the requested mapping to check whether it can be done entirely - * with 64K PTEs. - */ -static bool -xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - struct xe_res_cursor curs = *xe_walk->curs; - - if (!IS_ALIGNED(addr, SZ_64K)) - return false; - - if (next > xe_walk->l0_end_addr) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - xe_res_next(&curs, addr - xe_walk->va_curs_start); - for (; addr < next; addr += SZ_64K) { - if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) - return false; - - xe_res_next(&curs, SZ_64K); - } - - return addr == next; -} - -/* - * For non-compact "normal" 4K level-0 pagetables, we want to try to group - * addresses together in 64K-contigous regions to add a 64K TLB hint for the - * device to the PTE. - * This function determines whether the address is part of such a - * segment. For VRAM in normal pagetables, this is strictly necessary on - * some devices. - */ -static bool -xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - /* Address is within an already found 64k region */ - if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) - return true; - - xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); - xe_walk->addr_64K = addr; - - return xe_walk->found_64K; -} - -static int -xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_bind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - u16 pat_index = xe_walk->vma->pat_index; - struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); - struct xe_vm *vm = xe_walk->vm; - struct xe_pt *xe_child; - bool covers; - int ret = 0; - u64 pte; - - /* Is this a leaf entry ?*/ - if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { - struct xe_res_cursor *curs = xe_walk->curs; - bool is_null = xe_vma_is_null(xe_walk->vma); - - XE_WARN_ON(xe_walk->va_curs_start != addr); - - pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : - xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->vma, pat_index, level); - pte |= xe_walk->default_pte; - - /* - * Set the XE_PTE_PS64 hint if possible, otherwise if - * this device *requires* 64K PTE size for VRAM, fail. - */ - if (level == 0 && !xe_parent->is_compact) { - if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; - pte |= XE_PTE_PS64; - } else if (XE_WARN_ON(xe_walk->needs_64K)) { - return -EINVAL; - } - } - - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); - if (unlikely(ret)) - return ret; - - if (!is_null) - xe_res_next(curs, next - addr); - xe_walk->va_curs_start = next; - xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); - *action = ACTION_CONTINUE; - - return ret; - } - - /* - * Descending to lower level. Determine if we need to allocate a - * new page table or -directory, which we do if there is no - * previous one or there is one we can completely replace. - */ - if (level == 1) { - walk->shifts = xe_normal_pt_shifts; - xe_walk->l0_end_addr = next; - } - - covers = xe_pt_covers(addr, next, level, &xe_walk->base); - if (covers || !*child) { - u64 flags = 0; - - xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); - if (IS_ERR(xe_child)) - return PTR_ERR(xe_child); - - xe_pt_set_addr(xe_child, - round_down(addr, 1ull << walk->shifts[level])); - - if (!covers) - xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child); - - *child = &xe_child->base; - - /* - * Prefer the compact pagetable layout for L0 if possible. Only - * possible if VMA covers entire 2MB region as compact 64k and - * 4k pages cannot be mixed within a 2MB region. - * TODO: Suballocate the pt bo to avoid wasting a lot of - * memory. - */ - if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && - covers && xe_pt_scan_64K(addr, next, xe_walk)) { - walk->shifts = xe_compact_pt_shifts; - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT; - flags |= XE_PDE_64K; - xe_child->is_compact = true; - } - - pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, - pte); - } - - *action = ACTION_SUBTREE; - return ret; -} - -static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { - .pt_entry = xe_pt_stage_bind_entry, -}; - -/** - * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address - * range. - * @tile: The tile we're building for. - * @vma: The vma indicating the address range. - * @entries: Storage for the update entries used for connecting the tree to - * the main tree at commit time. - * @num_entries: On output contains the number of @entries used. - * - * This function builds a disconnected page-table tree for a given address - * range. The tree is connected to the main vm tree for the gpu using - * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). - * The function builds xe_vm_pgtable_update structures for already existing - * shared page-tables, and non-existing shared and non-shared page-tables - * are built and populated directly. - * - * Return 0 on success, negative error code on error. - */ -static int -xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - struct xe_device *xe = tile_to_xe(tile); - struct xe_bo *bo = xe_vma_bo(vma); - bool is_devmem = !xe_vma_is_userptr(vma) && bo && - (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); - struct xe_res_cursor curs; - struct xe_pt_stage_bind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_bind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .vm = xe_vma_vm(vma), - .tile = tile, - .curs = &curs, - .va_curs_start = xe_vma_start(vma), - .vma = vma, - .wupd.entries = entries, - .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - int ret; - - /** - * Default atomic expectations for different allocation scenarios are as follows: - * - * 1. Traditional API: When the VM is not in LR mode: - * - Device atomics are expected to function with all allocations. - * - * 2. Compute/SVM API: When the VM is in LR mode: - * - Device atomics are the default behavior when the bo is placed in a single region. - * - In all other cases device atomics will be disabled with AE=0 until an application - * request differently using a ioctl like madvise. - */ - if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { - if (xe_vm_in_lr_mode(xe_vma_vm(vma))) { - if (bo && xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - /** - * If a SMEM+LMEM allocation is backed by SMEM, a device - * atomics will cause a gpu page fault and which then - * gets migrated to LMEM, bind such allocations with - * device atomics enabled. - */ - else if (is_devmem && !xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } else { - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } - - /** - * Unset AE if the platform(PVC) doesn't support it on an - * allocation - */ - if (!xe->info.has_device_atomics_on_smem && !is_devmem) - xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE; - } - - if (is_devmem) { - xe_walk.default_pte |= XE_PPGTT_PTE_DM; - xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); - } - - if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) - xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); - - xe_bo_assert_held(bo); - - if (!xe_vma_is_null(vma)) { - if (xe_vma_is_userptr(vma)) - xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, - xe_vma_size(vma), &curs); - else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) - xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - else - xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - } else { - curs.size = xe_vma_size(vma); - } - - ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - *num_entries = xe_walk.wupd.num_used_entries; - return ret; -} - -/** - * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a - * shared pagetable. - * @addr: The start address within the non-shared pagetable. - * @end: The end address within the non-shared pagetable. - * @level: The level of the non-shared pagetable. - * @walk: Walk info. The function adjusts the walk action. - * @action: next action to perform (see enum page_walk_action) - * @offset: Ignored on input, First non-shared entry on output. - * @end_offset: Ignored on input, Last non-shared entry + 1 on output. - * - * A non-shared page-table has some entries that belong to the address range - * and others that don't. This function determines the entries that belong - * fully to the address range. Depending on level, some entries may - * partially belong to the address range (that can't happen at level 0). - * The function detects that and adjust those offsets to not include those - * partial entries. Iff it does detect partial entries, we know that there must - * be shared page tables also at lower levels, so it adjusts the walk action - * accordingly. - * - * Return: true if there were non-shared entries, false otherwise. - */ -static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, - struct xe_pt_walk *walk, - enum page_walk_action *action, - pgoff_t *offset, pgoff_t *end_offset) -{ - u64 size = 1ull << walk->shifts[level]; - - *offset = xe_pt_offset(addr, level, walk); - *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; - - if (!level) - return true; - - /* - * If addr or next are not size aligned, there are shared pts at lower - * level, so in that case traverse down the subtree - */ - *action = ACTION_CONTINUE; - if (!IS_ALIGNED(addr, size)) { - *action = ACTION_SUBTREE; - (*offset)++; - } - - if (!IS_ALIGNED(end, size)) { - *action = ACTION_SUBTREE; - (*end_offset)--; - } - - return *end_offset > *offset; -} - -struct xe_pt_zap_ptes_walk { - /** @base: The walk base-class */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're building for */ - struct xe_tile *tile; - - /* Output */ - /** @needs_invalidate: Whether we need to invalidate TLB*/ - bool needs_invalidate; -}; - -static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_zap_ptes_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - /* - * Note that we're called from an entry callback, and we're dealing - * with the child of that entry rather than the parent, so need to - * adjust level down. - */ - if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, - &end_offset)) { - xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap, - offset * sizeof(u64), 0, - (end_offset - offset) * sizeof(u64)); - xe_walk->needs_invalidate = true; - } - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { - .pt_entry = xe_pt_zap_ptes_entry, -}; - -/** - * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range - * @tile: The tile we're zapping for. - * @vma: GPU VMA detailing address range. - * - * Eviction and Userptr invalidation needs to be able to zap the - * gpu ptes of a given address range in pagefaulting mode. - * In order to be able to do that, that function needs access to the shared - * page-table entrieaso it can either clear the leaf PTEs or - * clear the pointers to lower-level page-tables. The caller is required - * to hold the necessary locks to ensure neither the page-table connectivity - * nor the page-table entries of the range is updated from under us. - * - * Return: Whether ptes were actually updated and a TLB invalidation is - * required. - */ -bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) -{ - struct xe_pt_zap_ptes_walk xe_walk = { - .base = { - .ops = &xe_pt_zap_ptes_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); - - if (!(pt_mask & BIT(tile->id))) - return false; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.needs_invalidate; -} - -static void -xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, - struct iosys_map *map, void *data, - u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_pt_entry *ptes = update->pt_entries; - u64 *ptr = data; - u32 i; - - for (i = 0; i < num_qwords; i++) { - if (map) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, ptes[i].pte); - else - ptr[i] = ptes[i].pte; - } -} - -static void xe_pt_abort_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i, j; - - for (i = 0; i < num_entries; i++) { - if (!entries[i].pt_entries) - continue; - - for (j = 0; j < entries[i].qwords; j++) - xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL); - kfree(entries[i].pt_entries); - } -} - -static void xe_pt_commit_locks_assert(struct xe_vma *vma) -{ - struct xe_vm *vm = xe_vma_vm(vma); - - lockdep_assert_held(&vm->lock); - - if (xe_vma_is_userptr(vma)) - lockdep_assert_held_read(&vm->userptr.notifier_lock); - else if (!xe_vma_is_null(vma)) - dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); - - xe_vm_assert_held(vm); -} - -static void xe_pt_commit_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries, bool rebind, - struct llist_head *deferred) -{ - u32 i, j; - - xe_pt_commit_locks_assert(vma); - - for (i = 0; i < num_entries; i++) { - struct xe_pt *pt = entries[i].pt; - struct xe_pt_dir *pt_dir; - - if (!rebind) - pt->num_live += entries[i].qwords; - - if (!pt->level) { - kfree(entries[i].pt_entries); - continue; - } - - pt_dir = as_xe_pt_dir(pt); - for (j = 0; j < entries[i].qwords; j++) { - u32 j_ = j + entries[i].ofs; - struct xe_pt *newpte = entries[i].pt_entries[j].pt; - - if (xe_pt_entry(pt_dir, j_)) - xe_pt_destroy(xe_pt_entry(pt_dir, j_), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[j_] = &newpte->base; - } - kfree(entries[i].pt_entries); - } -} - -static int -xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - int err; - - *num_entries = 0; - err = xe_pt_stage_bind(tile, vma, entries, num_entries); - if (!err) - xe_tile_assert(tile, *num_entries); - else /* abort! */ - xe_pt_abort_bind(vma, entries, *num_entries); - - return err; -} - -static void xe_vm_dbg_print_entries(struct xe_device *xe, - const struct xe_vm_pgtable_update *entries, - unsigned int num_entries) -#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) -{ - unsigned int i; - - vm_dbg(&xe->drm, "%u entries to update\n", num_entries); - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - struct xe_pt *xe_pt = entry->pt; - u64 page_size = 1ull << xe_pt_shift(xe_pt->level); - u64 end; - u64 start; - - xe_assert(xe, !entry->pt->is_compact); - start = entry->ofs * page_size; - end = start + page_size * entry->qwords; - vm_dbg(&xe->drm, - "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", - i, xe_pt->level, entry->ofs, entry->qwords, - xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); - } -} -#else -{} -#endif - -#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT - -static int xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; - static u32 count; - - if (count++ % divisor == divisor - 1) { - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - - uvma->userptr.divisor = divisor << 1; - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - return true; - } - - return false; -} - -#else - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - return false; -} - -#endif - -/** - * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks - * @base: Base we derive from. - * @bind: Whether this is a bind or an unbind operation. A bind operation - * makes the pre-commit callback error with -EAGAIN if it detects a - * pending invalidation. - * @locked: Whether the pre-commit callback locked the userptr notifier lock - * and it needs unlocking. - */ -struct xe_pt_migrate_pt_update { - struct xe_migrate_pt_update base; - bool bind; - bool locked; -}; - -/* - * This function adds the needed dependencies to a page-table update job - * to make sure racing jobs for separate bind engines don't race writing - * to the same page-table range, wreaking havoc. Initially use a single - * fence for the entire VM. An optimization would use smaller granularity. - */ -static int xe_pt_vm_dependencies(struct xe_sched_job *job, - struct xe_range_fence_tree *rftree, - u64 start, u64 last) -{ - struct xe_range_fence *rtfence; - struct dma_fence *fence; - int err; - - rtfence = xe_range_fence_tree_first(rftree, start, last); - while (rtfence) { - fence = rtfence->fence; - - if (!dma_fence_is_signaled(fence)) { - /* - * Is this a CPU update? GPU is busy updating, so return - * an error - */ - if (!job) - return -ETIME; - - dma_fence_get(fence); - err = drm_sched_job_add_dependency(&job->drm, fence); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_next(rtfence, start, last); - } - - return 0; -} - -static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_range_fence_tree *rftree = - &xe_vma_vm(pt_update->vma)->rftree[pt_update->tile_id]; - - return xe_pt_vm_dependencies(pt_update->job, rftree, - pt_update->start, pt_update->last); -} - -static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_pt_migrate_pt_update *userptr_update = - container_of(pt_update, typeof(*userptr_update), base); - struct xe_userptr_vma *uvma = to_userptr_vma(pt_update->vma); - unsigned long notifier_seq = uvma->userptr.notifier_seq; - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - int err = xe_pt_vm_dependencies(pt_update->job, - &vm->rftree[pt_update->tile_id], - pt_update->start, - pt_update->last); - - if (err) - return err; - - userptr_update->locked = false; - - /* - * Wait until nobody is running the invalidation notifier, and - * since we're exiting the loop holding the notifier lock, - * nobody can proceed invalidating either. - * - * Note that we don't update the vma->userptr.notifier_seq since - * we don't update the userptr pages. - */ - do { - down_read(&vm->userptr.notifier_lock); - if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq)) - break; - - up_read(&vm->userptr.notifier_lock); - - if (userptr_update->bind) - return -EAGAIN; - - notifier_seq = mmu_interval_read_begin(&uvma->userptr.notifier); - } while (true); - - /* Inject errors to test_whether they are handled correctly */ - if (userptr_update->bind && xe_pt_userptr_inject_eagain(uvma)) { - up_read(&vm->userptr.notifier_lock); - return -EAGAIN; - } - - userptr_update->locked = true; - - return 0; -} - -static const struct xe_migrate_pt_update_ops bind_ops = { - .populate = xe_vm_populate_pgtable, - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_bind_ops = { - .populate = xe_vm_populate_pgtable, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -struct invalidation_fence { - struct xe_gt_tlb_invalidation_fence base; - struct xe_gt *gt; - struct dma_fence *fence; - struct dma_fence_cb cb; - struct work_struct work; - u64 start; - u64 end; - u32 asid; -}; - -static void invalidation_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct invalidation_fence *ifence = - container_of(cb, struct invalidation_fence, cb); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); - if (!ifence->fence->error) { - queue_work(system_wq, &ifence->work); - } else { - ifence->base.base.error = ifence->fence->error; - dma_fence_signal(&ifence->base.base); - dma_fence_put(&ifence->base.base); - } - dma_fence_put(ifence->fence); -} - -static void invalidation_fence_work_func(struct work_struct *w) -{ - struct invalidation_fence *ifence = - container_of(w, struct invalidation_fence, work); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); - xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, - ifence->end, ifence->asid); -} - -static int invalidation_fence_init(struct xe_gt *gt, - struct invalidation_fence *ifence, - struct dma_fence *fence, - u64 start, u64 end, u32 asid) -{ - int ret; - - trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - - ifence->fence = fence; - ifence->gt = gt; - ifence->start = start; - ifence->end = end; - ifence->asid = asid; - - INIT_WORK(&ifence->work, invalidation_fence_work_func); - ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); - if (ret == -ENOENT) { - dma_fence_put(ifence->fence); /* Usually dropped in CB */ - invalidation_fence_work_func(&ifence->work); - } else if (ret) { - dma_fence_put(&ifence->base.base); /* Caller ref */ - dma_fence_put(&ifence->base.base); /* Creation ref */ - } - - xe_gt_assert(gt, !ret || ret == -ENOENT); - - return ret && ret != -ENOENT ? ret : 0; -} - -static void xe_pt_calc_rfence_interval(struct xe_vma *vma, - struct xe_pt_migrate_pt_update *update, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - int i, level = 0; - - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - update->base.start = ALIGN_DOWN(xe_vma_start(vma), - 0x1ull << xe_pt_shift(level)); - update->base.last = ALIGN(xe_vma_end(vma), - 0x1ull << xe_pt_shift(level)) - 1; -} - -/** - * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma - * address range. - * @tile: The tile to bind for. - * @vma: The vma to bind. - * @q: The exec_queue with which to do pipelined page-table updates. - * @syncs: Entries to sync on before binding the built tree to the live vm tree. - * @num_syncs: Number of @sync entries. - * @rebind: Whether we're rebinding this vma to the same address range without - * an unbind in-between. - * - * This function builds a page-table tree (see xe_pt_stage_bind() for more - * information on page-table building), and the xe_vm_pgtable_update entries - * abstracting the operations needed to attach it to the main vm tree. It - * then takes the relevant locks and updates the metadata side of the main - * vm tree and submits the operations for pipelined attachment of the - * gpu page-table to the vm main tree, (which can be done either by the - * cpu and the GPU). - * - * Return: A valid dma-fence representing the pipelined attachment operation - * on success, an error pointer on error. - */ -struct dma_fence * -__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs, - bool rebind) -{ - struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; - struct xe_pt_migrate_pt_update bind_pt_update = { - .base = { - .ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops, - .vma = vma, - .tile_id = tile->id, - }, - .bind = true, - }; - struct xe_vm *vm = xe_vma_vm(vma); - u32 num_entries; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL; - struct xe_range_fence *rfence; - int err; - - bind_pt_update.locked = false; - xe_bo_assert_held(xe_vma_bo(vma)); - xe_vm_assert_held(vm); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma), q); - - err = xe_pt_prepare_bind(tile, vma, entries, &num_entries); - if (err) - goto err; - - err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); - if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); - if (err) - goto err; - - xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); - - xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); - xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries, - num_entries); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. on LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * on !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) - return ERR_PTR(-ENOMEM); - } else if (rebind && !xe_vm_in_lr_mode(vm)) { - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - } - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - kfree(ifence); - return ERR_PTR(-ENOMEM); - } - - fence = xe_migrate_update_pgtables(tile->migrate, - vm, xe_vma_bo(vma), q, - entries, num_entries, - syncs, num_syncs, - &bind_pt_update.base); - if (!IS_ERR(fence)) { - bool last_munmap_rebind = vma->gpuva.flags & XE_VMA_LAST_REBIND; - LLIST_HEAD(deferred); - int err; - - err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - bind_pt_update.base.start, - bind_pt_update.base.last, fence); - if (err) - dma_fence_wait(fence, false); - - /* TLB invalidation must be done before signaling rebind */ - if (ifence) { - int err = invalidation_fence_init(tile->primary_gt, - ifence, fence, - xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); - if (err) { - dma_fence_put(fence); - kfree(ifence); - return ERR_PTR(err); - } - fence = &ifence->base.base; - } - - /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, rebind || - last_munmap_rebind ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - DMA_RESV_USAGE_BOOKKEEP); - xe_pt_commit_bind(vma, entries, num_entries, rebind, - bind_pt_update.locked ? &deferred : NULL); - - /* This vma is live (again?) now */ - vma->tile_present |= BIT(tile->id); - - if (bind_pt_update.locked) { - to_userptr_vma(vma)->userptr.initial_bind = true; - up_read(&vm->userptr.notifier_lock); - xe_bo_put_commit(&deferred); - } - if (!rebind && last_munmap_rebind && - xe_vm_in_preempt_fence_mode(vm)) - xe_vm_queue_rebind_worker(vm); - } else { - kfree(rfence); - kfree(ifence); - if (bind_pt_update.locked) - up_read(&vm->userptr.notifier_lock); - xe_pt_abort_bind(vma, entries, num_entries); - } - - return fence; - -err: - return ERR_PTR(err); -} - -struct xe_pt_stage_unbind_walk { - /** @base: The pagewalk base-class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're unbinding from. */ - struct xe_tile *tile; - - /** - * @modified_start: Walk range start, modified to include any - * shared pagetables that we're the only user of and can thus - * treat as private. - */ - u64 modified_start; - /** @modified_end: Walk range start, modified like @modified_start. */ - u64 modified_end; - - /* Output */ - /* @wupd: Structure to track the page-table updates we're building */ - struct xe_walk_update wupd; -}; - -/* - * Check whether this range is the only one populating this pagetable, - * and in that case, update the walk range checks so that higher levels don't - * view us as a shared pagetable. - */ -static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, - const struct xe_pt *child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - unsigned int shift = walk->shifts[level]; - u64 size = 1ull << shift; - - if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && - ((next - addr) >> shift) == child->num_live) { - u64 size = 1ull << walk->shifts[level + 1]; - - *action = ACTION_CONTINUE; - - if (xe_walk->modified_start >= addr) - xe_walk->modified_start = round_down(addr, size); - if (xe_walk->modified_end <= next) - xe_walk->modified_end = round_up(next, size); - - return true; - } - - return false; -} - -static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); - - return 0; -} - -static int -xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - u64 size = 1ull << walk->shifts[--level]; - - if (!IS_ALIGNED(addr, size)) - addr = xe_walk->modified_start; - if (!IS_ALIGNED(next, size)) - next = xe_walk->modified_end; - - /* Parent == *child is the root pt. Don't kill it. */ - if (parent != *child && - xe_pt_check_kill(addr, next, level, xe_child, action, walk)) - return 0; - - if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, - &end_offset)) - return 0; - - (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false); - xe_walk->wupd.updates[level].update->qwords = end_offset - offset; - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { - .pt_entry = xe_pt_stage_unbind_entry, - .pt_post_descend = xe_pt_stage_unbind_post_descend, -}; - -/** - * xe_pt_stage_unbind() - Build page-table update structures for an unbind - * operation - * @tile: The tile we're unbinding for. - * @vma: The vma we're unbinding. - * @entries: Caller-provided storage for the update structures. - * - * Builds page-table update structures for an unbind operation. The function - * will attempt to remove all page-tables that we're the only user - * of, and for that to work, the unbind operation must be committed in the - * same critical section that blocks racing binds to the same page-table tree. - * - * Return: The number of entries used. - */ -static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries) -{ - struct xe_pt_stage_unbind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_unbind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - .modified_start = xe_vma_start(vma), - .modified_end = xe_vma_end(vma), - .wupd.entries = entries, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.wupd.num_used_entries; -} - -static void -xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, - struct xe_tile *tile, struct iosys_map *map, - void *ptr, u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_vma *vma = pt_update->vma; - u64 empty = __xe_pt_empty_pte(tile, xe_vma_vm(vma), update->pt->level); - int i; - - if (map && map->is_iomem) - for (i = 0; i < num_qwords; ++i) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, empty); - else if (map) - memset64(map->vaddr + qword_ofs * sizeof(u64), empty, - num_qwords); - else - memset64(ptr, empty, num_qwords); -} - -static void -xe_pt_commit_unbind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 num_entries, - struct llist_head *deferred) -{ - u32 j; - - xe_pt_commit_locks_assert(vma); - - for (j = 0; j < num_entries; ++j) { - struct xe_vm_pgtable_update *entry = &entries[j]; - struct xe_pt *pt = entry->pt; - - pt->num_live -= entry->qwords; - if (pt->level) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - u32 i; - - for (i = entry->ofs; i < entry->ofs + entry->qwords; - i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[i] = NULL; - } - } - } -} - -<<<<<<< -static const struct xe_migrate_pt_update_ops unbind_ops = { - .populate = xe_migrate_clear_pgtable_callback, -======= -static void -xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int i, level = 0; - u64 start, last; - - for (i = 0; i < pt_op->num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &pt_op->entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level)); - last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1; - - if (start < pt_update_ops->start) - pt_update_ops->start = start; - if (last > pt_update_ops->last) - pt_update_ops->last = last; -} - -static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, - xe->info.tile_count); - - return 0; -} - -static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int err; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - pt_op->vma = NULL; - pt_op->bind = true; - pt_op->rebind = BIT(tile->id) & vma->tile_present; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - err = xe_pt_prepare_bind(tile, vma, pt_op->entries, - &pt_op->num_entries); - if (!err) { - xe_tile_assert(tile, pt_op->num_entries <= - ARRAY_SIZE(pt_op->entries)); - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, true); - - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. On LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * On !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!pt_op->rebind && xe_vm_has_scratch(vm) && - xe_vm_in_preempt_fence_mode(vm))) - pt_update_ops->needs_invalidation = true; - else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - - vma->tile_staged |= BIT(tile->id); - pt_op->vma = vma; - xe_pt_commit_prepare_bind(vma, pt_op->entries, - pt_op->num_entries, pt_op->rebind); - } else { - xe_pt_cancel_bind(vma, pt_op->entries, pt_op->num_entries); - } - - return err; -} - -static int unbind_op_prepare(struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int err; - - if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id))) - return 0; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - /* - * Wait for invalidation to complete. Can corrupt internal page table - * state if an invalidation is running while preparing an unbind. - */ - if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma))) - mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier); - - pt_op->vma = vma; - pt_op->bind = false; - pt_op->rebind = false; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries); - - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, false); - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - pt_update_ops->needs_invalidation = true; - - xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries); - - return 0; -} - -static int op_prepare(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op) -{ - int err = 0; - - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma); - pt_update_ops->wait_vm_kernel = true; - break; - case DRM_GPUVA_OP_REMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va)); - - if (!err && op->remap.prev) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.prev); - pt_update_ops->wait_vm_bookkeep = true; - } - if (!err && op->remap.next) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.next); - pt_update_ops->wait_vm_bookkeep = true; - } - break; - case DRM_GPUVA_OP_UNMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va)); - break; - case DRM_GPUVA_OP_PREFETCH: - err = bind_op_prepare(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va)); - pt_update_ops->wait_vm_kernel = true; - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static void -xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops) -{ - init_llist_head(&pt_update_ops->deferred); - pt_update_ops->start = ~0x0ull; - pt_update_ops->last = 0x0ull; -} - -/** - * xe_pt_update_ops_prepare() - Prepare PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Prepare PT update operations which includes updating internal PT state, - * allocate memory for page tables, populate page table being pruned in, and - * create PT update operations for leaf insertion / removal. - * - * Return: 0 on success, negative error code on error. - */ -int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct xe_vma_op *op; - int err; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - xe_pt_update_ops_init(pt_update_ops); - - err = dma_resv_reserve_fences(xe_vm_resv(vops->vm), - tile_to_xe(tile)->info.tile_count); - if (err) - return err; - - list_for_each_entry(op, &vops->list, link) { - err = op_prepare(vops->vm, tile, pt_update_ops, op); - - if (err) - return err; - } - - xe_tile_assert(tile, pt_update_ops->current_op <= - pt_update_ops->num_ops); - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE) - return -ENOSPC; -#endif - - return 0; -} - -static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present |= BIT(tile->id); - vma->tile_staged &= ~BIT(tile->id); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - to_userptr_vma(vma)->userptr.initial_bind = true; - } - - /* - * Kick rebind worker if this bind triggers preempt fences and not in - * the rebind worker - */ - if (pt_update_ops->wait_vm_bookkeep && - xe_vm_in_preempt_fence_mode(vm) && - !current->mm) - xe_vm_queue_rebind_worker(vm); -} - -static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present &= ~BIT(tile->id); - if (!vma->tile_present) { - list_del_init(&vma->combined_links.rebind); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - } -} - -static void op_commit(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op, struct dma_fence *fence, - struct dma_fence *fence2) -{ - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, - fence2); - break; - case DRM_GPUVA_OP_REMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence, - fence2); - - if (op->remap.prev) - bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence, fence2); - if (op->remap.next) - bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence, fence2); - break; - case DRM_GPUVA_OP_UNMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence, fence2); - break; - case DRM_GPUVA_OP_PREFETCH: - bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence, fence2); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } -} - -static const struct xe_migrate_pt_update_ops migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, ->>>>>>> - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { - .populate = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -/** - * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma - * address range. - * @tile: The tile to unbind for. - * @vma: The vma to unbind. - * @q: The exec_queue with which to do pipelined page-table updates. - * @syncs: Entries to sync on before disconnecting the tree to be destroyed. - * @num_syncs: Number of @sync entries. - * - * This function builds a the xe_vm_pgtable_update entries abstracting the - * operations needed to detach the page-table tree to be destroyed from the - * man vm tree. - * It then takes the relevant locks and submits the operations for - * pipelined detachment of the gpu page-table from the vm main tree, - * (which can be done either by the cpu and the GPU), Finally it frees the - * detached page-table tree. - * - * Return: A valid dma-fence representing the pipelined detachment operation - * on success, an error pointer on error. - */ -struct dma_fence * -__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs) -{ -<<<<<<< - struct xe_vm *vm = vops->vm; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL, *mfence = NULL; - struct dma_fence_chain *chain_fence = NULL; - struct xe_range_fence *rfence; - struct xe_vma_op *op; - int err = 0, i; - struct xe_migrate_pt_update update = { - .ops = pt_update_ops->needs_userptr_lock ? - &userptr_migrate_ops : - &migrate_ops, - .vops = vops, - .tile_id = tile->id, -======= - struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; - struct xe_pt_migrate_pt_update unbind_pt_update = { - .base = { - .ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops : - &unbind_ops, - .vma = vma, - .tile_id = tile->id, - }, ->>>>>>> - }; - struct xe_vm *vm = xe_vma_vm(vma); - u32 num_entries; - struct dma_fence *fence = NULL; - struct invalidation_fence *ifence; - struct xe_range_fence *rfence; - int err; - - LLIST_HEAD(deferred); - - xe_bo_assert_held(xe_vma_bo(vma)); - xe_vm_assert_held(vm); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma), q); - - num_entries = xe_pt_stage_unbind(tile, vma, entries); - xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); - - xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); - xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, - num_entries); - -<<<<<<< - err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); - if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); - if (err) - return ERR_PTR(err); - - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) - return ERR_PTR(-ENOMEM); -======= - if (pt_update_ops->needs_invalidation) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) { - err = -ENOMEM; - goto kill_vm_tile1; - } - if (tile->media_gt) { - mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!mfence) { - err = -ENOMEM; - goto free_ifence; - } - chain_fence = dma_fence_chain_alloc(); - if (!chain_fence) { - err = -ENOMEM; - goto free_ifence; - } - } - } ->>>>>>> - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - kfree(ifence); - return ERR_PTR(-ENOMEM); - } - - /* - * Even if we were already evicted and unbind to destroy, we need to - * clear again here. The eviction may have updated pagetables at a - * lower level, because it needs to be more conservative. - */ - fence = xe_migrate_update_pgtables(tile->migrate, - vm, NULL, q ? q : - vm->q[tile->id], - entries, num_entries, - syncs, num_syncs, - &unbind_pt_update.base); - if (!IS_ERR(fence)) { - int err; - - err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - unbind_pt_update.base.start, - unbind_pt_update.base.last, fence); - if (err) - dma_fence_wait(fence, false); - -<<<<<<< - /* TLB invalidation must be done before signaling unbind */ - err = invalidation_fence_init(tile->primary_gt, ifence, fence, - xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); - if (err) { - dma_fence_put(fence); - kfree(ifence); - return ERR_PTR(err); - } - fence = &ifence->base.base; - - /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, - DMA_RESV_USAGE_BOOKKEEP); - - /* This fence will be installed by caller when doing eviction */ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - DMA_RESV_USAGE_BOOKKEEP); - xe_pt_commit_unbind(vma, entries, num_entries, - unbind_pt_update.locked ? &deferred : NULL); - vma->tile_present &= ~BIT(tile->id); - } else { - kfree(rfence); - kfree(ifence); - } - - if (!vma->tile_present) - list_del_init(&vma->combined_links.rebind); - - if (unbind_pt_update.locked) { - xe_tile_assert(tile, xe_vma_is_userptr(vma)); -======= - xe_pt_commit(pt_op->vma, pt_op->entries, - pt_op->num_entries, &pt_update_ops->deferred); - pt_op->vma = NULL; /* skip in xe_pt_update_ops_abort */ - } - - if (xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - pt_update_ops->start, - pt_update_ops->last, fence)) - dma_fence_wait(fence, false); - - /* tlb invalidation must be done before signaling rebind */ - if (ifence) { - if (mfence) - dma_fence_get(fence); - invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - if (mfence) { - invalidation_fence_init(tile->media_gt, mfence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - dma_fence_chain_init(chain_fence, &ifence->base.base, - &mfence->base.base, 0); - fence = &chain_fence->base; - } else { - fence = &ifence->base.base; - } - } - - if (!mfence) { - dma_resv_add_fence(xe_vm_resv(vm), fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); - } else { - dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, - &ifence->base.base, &mfence->base.base); - } ->>>>>>> - - if (!vma->tile_present) { - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - up_read(&vm->userptr.notifier_lock); - xe_bo_put_commit(&deferred); - } - - return fence; -<<<<<<< -======= - -free_rfence: - kfree(rfence); -free_ifence: - dma_fence_chain_free(chain_fence); - kfree(mfence); - kfree(ifence); -kill_vm_tile1: - if (err != -EAGAIN && tile->id) - xe_vm_kill(vops->vm, false); - - return ERR_PTR(err); -} - -/** - * xe_pt_update_ops_fini() - Finish PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operations - * - * Finish PT update operations by committing to destroy page table memory - */ -void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - for (i = 0; i < pt_update_ops->current_op; ++i) { - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; - - xe_pt_free_bind(pt_op->entries, pt_op->num_entries); - } - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); -} - -/** - * xe_pt_update_ops_abort() - Abort PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Abort PT update operations by unwinding internal PT state - */ -void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - for (i = pt_update_ops->num_ops - 1; i >= 0; --i) { - struct xe_vm_pgtable_update_op *pt_op = - &pt_update_ops->ops[i]; - - if (!pt_op->vma || i >= pt_update_ops->current_op) - continue; - - if (pt_op->bind) - xe_pt_abort_bind(pt_op->vma, pt_op->entries, - pt_op->num_entries, - pt_op->rebind); - else - xe_pt_abort_unbind(pt_op->vma, pt_op->entries, - pt_op->num_entries); - } - - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); ->>>>>>> -} diff --git a/rr-cache/4951c0e45d299a9570812ec9f1cc27e11aa21d6e/preimage b/rr-cache/4951c0e45d299a9570812ec9f1cc27e11aa21d6e/preimage deleted file mode 100644 index bb6eadffff5e..000000000000 --- a/rr-cache/4951c0e45d299a9570812ec9f1cc27e11aa21d6e/preimage +++ /dev/null @@ -1,2248 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include <linux/dma-fence-array.h> - -#include "xe_pt.h" - -#include "regs/xe_gtt_defs.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_gt.h" -#include "xe_gt_tlb_invalidation.h" -#include "xe_migrate.h" -#include "xe_pt_types.h" -#include "xe_pt_walk.h" -#include "xe_res_cursor.h" -#include "xe_trace.h" -#include "xe_ttm_stolen_mgr.h" -#include "xe_vm.h" - -struct xe_pt_dir { - struct xe_pt pt; - /** @children: Array of page-table child nodes */ - struct xe_ptw *children[XE_PDES]; -}; - -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) -#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) -#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) -#else -#define xe_pt_set_addr(__xe_pt, __addr) -#define xe_pt_addr(__xe_pt) 0ull -#endif - -static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; -static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; - -#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) - -static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) -{ - return container_of(pt, struct xe_pt_dir, pt); -} - -static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) -{ - return container_of(pt_dir->children[index], struct xe_pt, base); -} - -static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, - unsigned int level) -{ - struct xe_device *xe = tile_to_xe(tile); - u16 pat_index = xe->pat.idx[XE_CACHE_WB]; - u8 id = tile->id; - - if (!xe_vm_has_scratch(vm)) - return 0; - - if (level > MAX_HUGEPTE_LEVEL) - return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, - 0, pat_index); - - return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | - XE_PTE_NULL; -} - -static void xe_pt_free(struct xe_pt *pt) -{ - if (pt->level) - kfree(as_xe_pt_dir(pt)); - else - kfree(pt); -} - -/** - * xe_pt_create() - Create a page-table. - * @vm: The vm to create for. - * @tile: The tile to create for. - * @level: The page-table level. - * - * Allocate and initialize a single struct xe_pt metadata structure. Also - * create the corresponding page-table bo, but don't initialize it. If the - * level is grater than zero, then it's assumed to be a directory page- - * table and the directory structure is also allocated and initialized to - * NULL pointers. - * - * Return: A valid struct xe_pt pointer on success, Pointer error code on - * error. - */ -struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level) -{ - struct xe_pt *pt; - struct xe_bo *bo; - int err; - - if (level) { - struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL); - - pt = (dir) ? &dir->pt : NULL; - } else { - pt = kzalloc(sizeof(*pt), GFP_KERNEL); - } - if (!pt) - return ERR_PTR(-ENOMEM); - - pt->level = level; - bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_NO_RESV_EVICT | - XE_BO_FLAG_PAGETABLE); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - goto err_kfree; - } - pt->bo = bo; - pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; - - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, pt->bo); - xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL); - - return pt; - -err_kfree: - xe_pt_free(pt); - return ERR_PTR(err); -} - -/** - * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero - * entries. - * @tile: The tile the scratch pagetable of which to use. - * @vm: The vm we populate for. - * @pt: The pagetable the bo of which to initialize. - * - * Populate the page-table bo of @pt with entries pointing into the tile's - * scratch page-table tree if any. Otherwise populate with zeros. - */ -void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, - struct xe_pt *pt) -{ - struct iosys_map *map = &pt->bo->vmap; - u64 empty; - int i; - - if (!xe_vm_has_scratch(vm)) { - /* - * FIXME: Some memory is allocated already allocated to zero? - * Find out which memory that is and avoid this memset... - */ - xe_map_memset(vm->xe, map, 0, 0, SZ_4K); - } else { - empty = __xe_pt_empty_pte(tile, vm, pt->level); - for (i = 0; i < XE_PDES; i++) - xe_pt_write(vm->xe, map, i, empty); - } -} - -/** - * xe_pt_shift() - Return the ilog2 value of the size of the address range of - * a page-table at a certain level. - * @level: The level. - * - * Return: The ilog2 value of the size of the address range of a page-table - * at level @level. - */ -unsigned int xe_pt_shift(unsigned int level) -{ - return XE_PTE_SHIFT + XE_PDE_SHIFT * level; -} - -/** - * xe_pt_destroy() - Destroy a page-table tree. - * @pt: The root of the page-table tree to destroy. - * @flags: vm flags. Currently unused. - * @deferred: List head of lockless list for deferred putting. NULL for - * immediate putting. - * - * Puts the page-table bo, recursively calls xe_pt_destroy on all children - * and finally frees @pt. TODO: Can we remove the @flags argument? - */ -void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) -{ - int i; - - if (!pt) - return; - - XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list)); - xe_bo_unpin(pt->bo); - xe_bo_put_deferred(pt->bo, deferred); - - if (pt->level > 0 && pt->num_live) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - - for (i = 0; i < XE_PDES; i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, - deferred); - } - } - xe_pt_free(pt); -} - -/** - * DOC: Pagetable building - * - * Below we use the term "page-table" for both page-directories, containing - * pointers to lower level page-directories or page-tables, and level 0 - * page-tables that contain only page-table-entries pointing to memory pages. - * - * When inserting an address range in an already existing page-table tree - * there will typically be a set of page-tables that are shared with other - * address ranges, and a set that are private to this address range. - * The set of shared page-tables can be at most two per level, - * and those can't be updated immediately because the entries of those - * page-tables may still be in use by the gpu for other mappings. Therefore - * when inserting entries into those, we instead stage those insertions by - * adding insertion data into struct xe_vm_pgtable_update structures. This - * data, (subtrees for the cpu and page-table-entries for the gpu) is then - * added in a separate commit step. CPU-data is committed while still under the - * vm lock, the object lock and for userptr, the notifier lock in read mode. - * The GPU async data is committed either by the GPU or CPU after fulfilling - * relevant dependencies. - * For non-shared page-tables (and, in fact, for shared ones that aren't - * existing at the time of staging), we add the data in-place without the - * special update structures. This private part of the page-table tree will - * remain disconnected from the vm page-table tree until data is committed to - * the shared page tables of the vm tree in the commit phase. - */ - -struct xe_pt_update { - /** @update: The update structure we're building for this parent. */ - struct xe_vm_pgtable_update *update; - /** @parent: The parent. Used to detect a parent change. */ - struct xe_pt *parent; - /** @preexisting: Whether the parent was pre-existing or allocated */ - bool preexisting; -}; - -struct xe_pt_stage_bind_walk { - /** base: The base class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @vm: The vm we're building for. */ - struct xe_vm *vm; - /** @tile: The tile we're building for. */ - struct xe_tile *tile; - /** @default_pte: PTE flag only template. No address is associated */ - u64 default_pte; - /** @dma_offset: DMA offset to add to the PTE. */ - u64 dma_offset; - /** - * @needs_64k: This address range enforces 64K alignment and - * granularity. - */ - bool needs_64K; - /** - * @vma: VMA being mapped - */ - struct xe_vma *vma; - - /* Also input, but is updated during the walk*/ - /** @curs: The DMA address cursor. */ - struct xe_res_cursor *curs; - /** @va_curs_start: The Virtual address coresponding to @curs->start */ - u64 va_curs_start; - - /* Output */ - struct xe_walk_update { - /** @wupd.entries: Caller provided storage. */ - struct xe_vm_pgtable_update *entries; - /** @wupd.num_used_entries: Number of update @entries used. */ - unsigned int num_used_entries; - /** @wupd.updates: Tracks the update entry at a given level */ - struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; - } wupd; - - /* Walk state */ - /** - * @l0_end_addr: The end address of the current l0 leaf. Used for - * 64K granularity detection. - */ - u64 l0_end_addr; - /** @addr_64K: The start address of the current 64K chunk. */ - u64 addr_64K; - /** @found_64: Whether @add_64K actually points to a 64K chunk. */ - bool found_64K; -}; - -static int -xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, - pgoff_t offset, bool alloc_entries) -{ - struct xe_pt_update *upd = &wupd->updates[parent->level]; - struct xe_vm_pgtable_update *entry; - - /* - * For *each level*, we could only have one active - * struct xt_pt_update at any one time. Once we move on to a - * new parent and page-directory, the old one is complete, and - * updates are either already stored in the build tree or in - * @wupd->entries - */ - if (likely(upd->parent == parent)) - return 0; - - upd->parent = parent; - upd->preexisting = true; - - if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) - return -EINVAL; - - entry = wupd->entries + wupd->num_used_entries++; - upd->update = entry; - entry->ofs = offset; - entry->pt_bo = parent->bo; - entry->pt = parent; - entry->flags = 0; - entry->qwords = 0; - - if (alloc_entries) { - entry->pt_entries = kmalloc_array(XE_PDES, - sizeof(*entry->pt_entries), - GFP_KERNEL); - if (!entry->pt_entries) - return -ENOMEM; - } - - return 0; -} - -/* - * NOTE: This is a very frequently called function so we allow ourselves - * to annotate (using branch prediction hints) the fastpath of updating a - * non-pre-existing pagetable with leaf ptes. - */ -static int -xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, - pgoff_t offset, struct xe_pt *xe_child, u64 pte) -{ - struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; - struct xe_pt_update *child_upd = xe_child ? - &xe_walk->wupd.updates[xe_child->level] : NULL; - int ret; - - ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); - if (unlikely(ret)) - return ret; - - /* - * Register this new pagetable so that it won't be recognized as - * a shared pagetable by a subsequent insertion. - */ - if (unlikely(child_upd)) { - child_upd->update = NULL; - child_upd->parent = xe_child; - child_upd->preexisting = false; - } - - if (likely(!upd->preexisting)) { - /* Continue building a non-connected subtree. */ - struct iosys_map *map = &parent->bo->vmap; - - if (unlikely(xe_child)) - parent->base.children[offset] = &xe_child->base; - - xe_pt_write(xe_walk->vm->xe, map, offset, pte); - parent->num_live++; - } else { - /* Shared pt. Stage update. */ - unsigned int idx; - struct xe_vm_pgtable_update *entry = upd->update; - - idx = offset - entry->ofs; - entry->pt_entries[idx].pt = xe_child; - entry->pt_entries[idx].pte = pte; - entry->qwords++; - } - - return 0; -} - -static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, - struct xe_pt_stage_bind_walk *xe_walk) -{ - u64 size, dma; - - if (level > MAX_HUGEPTE_LEVEL) - return false; - - /* Does the virtual range requested cover a huge pte? */ - if (!xe_pt_covers(addr, next, level, &xe_walk->base)) - return false; - - /* Does the DMA segment cover the whole pte? */ - if (next - xe_walk->va_curs_start > xe_walk->curs->size) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - /* Is the DMA address huge PTE size aligned? */ - size = next - addr; - dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); - - return IS_ALIGNED(dma, size); -} - -/* - * Scan the requested mapping to check whether it can be done entirely - * with 64K PTEs. - */ -static bool -xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - struct xe_res_cursor curs = *xe_walk->curs; - - if (!IS_ALIGNED(addr, SZ_64K)) - return false; - - if (next > xe_walk->l0_end_addr) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - xe_res_next(&curs, addr - xe_walk->va_curs_start); - for (; addr < next; addr += SZ_64K) { - if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) - return false; - - xe_res_next(&curs, SZ_64K); - } - - return addr == next; -} - -/* - * For non-compact "normal" 4K level-0 pagetables, we want to try to group - * addresses together in 64K-contigous regions to add a 64K TLB hint for the - * device to the PTE. - * This function determines whether the address is part of such a - * segment. For VRAM in normal pagetables, this is strictly necessary on - * some devices. - */ -static bool -xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - /* Address is within an already found 64k region */ - if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) - return true; - - xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); - xe_walk->addr_64K = addr; - - return xe_walk->found_64K; -} - -static int -xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_bind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - u16 pat_index = xe_walk->vma->pat_index; - struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); - struct xe_vm *vm = xe_walk->vm; - struct xe_pt *xe_child; - bool covers; - int ret = 0; - u64 pte; - - /* Is this a leaf entry ?*/ - if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { - struct xe_res_cursor *curs = xe_walk->curs; - bool is_null = xe_vma_is_null(xe_walk->vma); - - XE_WARN_ON(xe_walk->va_curs_start != addr); - - pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : - xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->vma, pat_index, level); - pte |= xe_walk->default_pte; - - /* - * Set the XE_PTE_PS64 hint if possible, otherwise if - * this device *requires* 64K PTE size for VRAM, fail. - */ - if (level == 0 && !xe_parent->is_compact) { - if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; - pte |= XE_PTE_PS64; - } else if (XE_WARN_ON(xe_walk->needs_64K)) { - return -EINVAL; - } - } - - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); - if (unlikely(ret)) - return ret; - - if (!is_null) - xe_res_next(curs, next - addr); - xe_walk->va_curs_start = next; - xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); - *action = ACTION_CONTINUE; - - return ret; - } - - /* - * Descending to lower level. Determine if we need to allocate a - * new page table or -directory, which we do if there is no - * previous one or there is one we can completely replace. - */ - if (level == 1) { - walk->shifts = xe_normal_pt_shifts; - xe_walk->l0_end_addr = next; - } - - covers = xe_pt_covers(addr, next, level, &xe_walk->base); - if (covers || !*child) { - u64 flags = 0; - - xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); - if (IS_ERR(xe_child)) - return PTR_ERR(xe_child); - - xe_pt_set_addr(xe_child, - round_down(addr, 1ull << walk->shifts[level])); - - if (!covers) - xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child); - - *child = &xe_child->base; - - /* - * Prefer the compact pagetable layout for L0 if possible. Only - * possible if VMA covers entire 2MB region as compact 64k and - * 4k pages cannot be mixed within a 2MB region. - * TODO: Suballocate the pt bo to avoid wasting a lot of - * memory. - */ - if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && - covers && xe_pt_scan_64K(addr, next, xe_walk)) { - walk->shifts = xe_compact_pt_shifts; - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT; - flags |= XE_PDE_64K; - xe_child->is_compact = true; - } - - pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, - pte); - } - - *action = ACTION_SUBTREE; - return ret; -} - -static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { - .pt_entry = xe_pt_stage_bind_entry, -}; - -/** - * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address - * range. - * @tile: The tile we're building for. - * @vma: The vma indicating the address range. - * @entries: Storage for the update entries used for connecting the tree to - * the main tree at commit time. - * @num_entries: On output contains the number of @entries used. - * - * This function builds a disconnected page-table tree for a given address - * range. The tree is connected to the main vm tree for the gpu using - * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). - * The function builds xe_vm_pgtable_update structures for already existing - * shared page-tables, and non-existing shared and non-shared page-tables - * are built and populated directly. - * - * Return 0 on success, negative error code on error. - */ -static int -xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - struct xe_device *xe = tile_to_xe(tile); - struct xe_bo *bo = xe_vma_bo(vma); - bool is_devmem = !xe_vma_is_userptr(vma) && bo && - (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); - struct xe_res_cursor curs; - struct xe_pt_stage_bind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_bind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .vm = xe_vma_vm(vma), - .tile = tile, - .curs = &curs, - .va_curs_start = xe_vma_start(vma), - .vma = vma, - .wupd.entries = entries, - .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - int ret; - - /** - * Default atomic expectations for different allocation scenarios are as follows: - * - * 1. Traditional API: When the VM is not in LR mode: - * - Device atomics are expected to function with all allocations. - * - * 2. Compute/SVM API: When the VM is in LR mode: - * - Device atomics are the default behavior when the bo is placed in a single region. - * - In all other cases device atomics will be disabled with AE=0 until an application - * request differently using a ioctl like madvise. - */ - if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { - if (xe_vm_in_lr_mode(xe_vma_vm(vma))) { - if (bo && xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - /** - * If a SMEM+LMEM allocation is backed by SMEM, a device - * atomics will cause a gpu page fault and which then - * gets migrated to LMEM, bind such allocations with - * device atomics enabled. - */ - else if (is_devmem && !xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } else { - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } - - /** - * Unset AE if the platform(PVC) doesn't support it on an - * allocation - */ - if (!xe->info.has_device_atomics_on_smem && !is_devmem) - xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE; - } - - if (is_devmem) { - xe_walk.default_pte |= XE_PPGTT_PTE_DM; - xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); - } - - if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) - xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); - - xe_bo_assert_held(bo); - - if (!xe_vma_is_null(vma)) { - if (xe_vma_is_userptr(vma)) - xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, - xe_vma_size(vma), &curs); - else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) - xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - else - xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - } else { - curs.size = xe_vma_size(vma); - } - - ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - *num_entries = xe_walk.wupd.num_used_entries; - return ret; -} - -/** - * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a - * shared pagetable. - * @addr: The start address within the non-shared pagetable. - * @end: The end address within the non-shared pagetable. - * @level: The level of the non-shared pagetable. - * @walk: Walk info. The function adjusts the walk action. - * @action: next action to perform (see enum page_walk_action) - * @offset: Ignored on input, First non-shared entry on output. - * @end_offset: Ignored on input, Last non-shared entry + 1 on output. - * - * A non-shared page-table has some entries that belong to the address range - * and others that don't. This function determines the entries that belong - * fully to the address range. Depending on level, some entries may - * partially belong to the address range (that can't happen at level 0). - * The function detects that and adjust those offsets to not include those - * partial entries. Iff it does detect partial entries, we know that there must - * be shared page tables also at lower levels, so it adjusts the walk action - * accordingly. - * - * Return: true if there were non-shared entries, false otherwise. - */ -static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, - struct xe_pt_walk *walk, - enum page_walk_action *action, - pgoff_t *offset, pgoff_t *end_offset) -{ - u64 size = 1ull << walk->shifts[level]; - - *offset = xe_pt_offset(addr, level, walk); - *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; - - if (!level) - return true; - - /* - * If addr or next are not size aligned, there are shared pts at lower - * level, so in that case traverse down the subtree - */ - *action = ACTION_CONTINUE; - if (!IS_ALIGNED(addr, size)) { - *action = ACTION_SUBTREE; - (*offset)++; - } - - if (!IS_ALIGNED(end, size)) { - *action = ACTION_SUBTREE; - (*end_offset)--; - } - - return *end_offset > *offset; -} - -struct xe_pt_zap_ptes_walk { - /** @base: The walk base-class */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're building for */ - struct xe_tile *tile; - - /* Output */ - /** @needs_invalidate: Whether we need to invalidate TLB*/ - bool needs_invalidate; -}; - -static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_zap_ptes_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - /* - * Note that we're called from an entry callback, and we're dealing - * with the child of that entry rather than the parent, so need to - * adjust level down. - */ - if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, - &end_offset)) { - xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap, - offset * sizeof(u64), 0, - (end_offset - offset) * sizeof(u64)); - xe_walk->needs_invalidate = true; - } - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { - .pt_entry = xe_pt_zap_ptes_entry, -}; - -/** - * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range - * @tile: The tile we're zapping for. - * @vma: GPU VMA detailing address range. - * - * Eviction and Userptr invalidation needs to be able to zap the - * gpu ptes of a given address range in pagefaulting mode. - * In order to be able to do that, that function needs access to the shared - * page-table entrieaso it can either clear the leaf PTEs or - * clear the pointers to lower-level page-tables. The caller is required - * to hold the necessary locks to ensure neither the page-table connectivity - * nor the page-table entries of the range is updated from under us. - * - * Return: Whether ptes were actually updated and a TLB invalidation is - * required. - */ -bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) -{ - struct xe_pt_zap_ptes_walk xe_walk = { - .base = { - .ops = &xe_pt_zap_ptes_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); - - if (!(pt_mask & BIT(tile->id))) - return false; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.needs_invalidate; -} - -static void -xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, - struct iosys_map *map, void *data, - u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_pt_entry *ptes = update->pt_entries; - u64 *ptr = data; - u32 i; - - for (i = 0; i < num_qwords; i++) { - if (map) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, ptes[i].pte); - else - ptr[i] = ptes[i].pte; - } -} - -static void xe_pt_abort_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i, j; - - for (i = 0; i < num_entries; i++) { - if (!entries[i].pt_entries) - continue; - - for (j = 0; j < entries[i].qwords; j++) - xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL); - kfree(entries[i].pt_entries); - } -} - -static void xe_pt_commit_locks_assert(struct xe_vma *vma) -{ - struct xe_vm *vm = xe_vma_vm(vma); - - lockdep_assert_held(&vm->lock); - - if (xe_vma_is_userptr(vma)) - lockdep_assert_held_read(&vm->userptr.notifier_lock); - else if (!xe_vma_is_null(vma)) - dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); - - xe_vm_assert_held(vm); -} - -static void xe_pt_commit_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries, bool rebind, - struct llist_head *deferred) -{ - u32 i, j; - - xe_pt_commit_locks_assert(vma); - - for (i = 0; i < num_entries; i++) { - struct xe_pt *pt = entries[i].pt; - struct xe_pt_dir *pt_dir; - - if (!rebind) - pt->num_live += entries[i].qwords; - - if (!pt->level) { - kfree(entries[i].pt_entries); - continue; - } - - pt_dir = as_xe_pt_dir(pt); - for (j = 0; j < entries[i].qwords; j++) { - u32 j_ = j + entries[i].ofs; - struct xe_pt *newpte = entries[i].pt_entries[j].pt; - - if (xe_pt_entry(pt_dir, j_)) - xe_pt_destroy(xe_pt_entry(pt_dir, j_), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[j_] = &newpte->base; - } - kfree(entries[i].pt_entries); - } -} - -static int -xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - int err; - - *num_entries = 0; - err = xe_pt_stage_bind(tile, vma, entries, num_entries); - if (!err) - xe_tile_assert(tile, *num_entries); - else /* abort! */ - xe_pt_abort_bind(vma, entries, *num_entries); - - return err; -} - -static void xe_vm_dbg_print_entries(struct xe_device *xe, - const struct xe_vm_pgtable_update *entries, - unsigned int num_entries) -#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) -{ - unsigned int i; - - vm_dbg(&xe->drm, "%u entries to update\n", num_entries); - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - struct xe_pt *xe_pt = entry->pt; - u64 page_size = 1ull << xe_pt_shift(xe_pt->level); - u64 end; - u64 start; - - xe_assert(xe, !entry->pt->is_compact); - start = entry->ofs * page_size; - end = start + page_size * entry->qwords; - vm_dbg(&xe->drm, - "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", - i, xe_pt->level, entry->ofs, entry->qwords, - xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); - } -} -#else -{} -#endif - -#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT - -static int xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; - static u32 count; - - if (count++ % divisor == divisor - 1) { - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - - uvma->userptr.divisor = divisor << 1; - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - return true; - } - - return false; -} - -#else - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - return false; -} - -#endif - -/** - * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks - * @base: Base we derive from. - * @bind: Whether this is a bind or an unbind operation. A bind operation - * makes the pre-commit callback error with -EAGAIN if it detects a - * pending invalidation. - * @locked: Whether the pre-commit callback locked the userptr notifier lock - * and it needs unlocking. - */ -struct xe_pt_migrate_pt_update { - struct xe_migrate_pt_update base; - bool bind; - bool locked; -}; - -/* - * This function adds the needed dependencies to a page-table update job - * to make sure racing jobs for separate bind engines don't race writing - * to the same page-table range, wreaking havoc. Initially use a single - * fence for the entire VM. An optimization would use smaller granularity. - */ -static int xe_pt_vm_dependencies(struct xe_sched_job *job, - struct xe_range_fence_tree *rftree, - u64 start, u64 last) -{ - struct xe_range_fence *rtfence; - struct dma_fence *fence; - int err; - - rtfence = xe_range_fence_tree_first(rftree, start, last); - while (rtfence) { - fence = rtfence->fence; - - if (!dma_fence_is_signaled(fence)) { - /* - * Is this a CPU update? GPU is busy updating, so return - * an error - */ - if (!job) - return -ETIME; - - dma_fence_get(fence); - err = drm_sched_job_add_dependency(&job->drm, fence); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_next(rtfence, start, last); - } - - return 0; -} - -static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_range_fence_tree *rftree = - &xe_vma_vm(pt_update->vma)->rftree[pt_update->tile_id]; - - return xe_pt_vm_dependencies(pt_update->job, rftree, - pt_update->start, pt_update->last); -} - -static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_pt_migrate_pt_update *userptr_update = - container_of(pt_update, typeof(*userptr_update), base); - struct xe_userptr_vma *uvma = to_userptr_vma(pt_update->vma); - unsigned long notifier_seq = uvma->userptr.notifier_seq; - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - int err = xe_pt_vm_dependencies(pt_update->job, - &vm->rftree[pt_update->tile_id], - pt_update->start, - pt_update->last); - - if (err) - return err; - - userptr_update->locked = false; - - /* - * Wait until nobody is running the invalidation notifier, and - * since we're exiting the loop holding the notifier lock, - * nobody can proceed invalidating either. - * - * Note that we don't update the vma->userptr.notifier_seq since - * we don't update the userptr pages. - */ - do { - down_read(&vm->userptr.notifier_lock); - if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq)) - break; - - up_read(&vm->userptr.notifier_lock); - - if (userptr_update->bind) - return -EAGAIN; - - notifier_seq = mmu_interval_read_begin(&uvma->userptr.notifier); - } while (true); - - /* Inject errors to test_whether they are handled correctly */ - if (userptr_update->bind && xe_pt_userptr_inject_eagain(uvma)) { - up_read(&vm->userptr.notifier_lock); - return -EAGAIN; - } - - userptr_update->locked = true; - - return 0; -} - -static const struct xe_migrate_pt_update_ops bind_ops = { - .populate = xe_vm_populate_pgtable, - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_bind_ops = { - .populate = xe_vm_populate_pgtable, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -struct invalidation_fence { - struct xe_gt_tlb_invalidation_fence base; - struct xe_gt *gt; - struct dma_fence *fence; - struct dma_fence_cb cb; - struct work_struct work; - u64 start; - u64 end; - u32 asid; -}; - -static void invalidation_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct invalidation_fence *ifence = - container_of(cb, struct invalidation_fence, cb); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); - if (!ifence->fence->error) { - queue_work(system_wq, &ifence->work); - } else { - ifence->base.base.error = ifence->fence->error; - dma_fence_signal(&ifence->base.base); - dma_fence_put(&ifence->base.base); - } - dma_fence_put(ifence->fence); -} - -static void invalidation_fence_work_func(struct work_struct *w) -{ - struct invalidation_fence *ifence = - container_of(w, struct invalidation_fence, work); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); - xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, - ifence->end, ifence->asid); -} - -static int invalidation_fence_init(struct xe_gt *gt, - struct invalidation_fence *ifence, - struct dma_fence *fence, - u64 start, u64 end, u32 asid) -{ - int ret; - - trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - - ifence->fence = fence; - ifence->gt = gt; - ifence->start = start; - ifence->end = end; - ifence->asid = asid; - - INIT_WORK(&ifence->work, invalidation_fence_work_func); - ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); - if (ret == -ENOENT) { - dma_fence_put(ifence->fence); /* Usually dropped in CB */ - invalidation_fence_work_func(&ifence->work); - } else if (ret) { - dma_fence_put(&ifence->base.base); /* Caller ref */ - dma_fence_put(&ifence->base.base); /* Creation ref */ - } - - xe_gt_assert(gt, !ret || ret == -ENOENT); - - return ret && ret != -ENOENT ? ret : 0; -} - -static void xe_pt_calc_rfence_interval(struct xe_vma *vma, - struct xe_pt_migrate_pt_update *update, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - int i, level = 0; - - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - update->base.start = ALIGN_DOWN(xe_vma_start(vma), - 0x1ull << xe_pt_shift(level)); - update->base.last = ALIGN(xe_vma_end(vma), - 0x1ull << xe_pt_shift(level)) - 1; -} - -/** - * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma - * address range. - * @tile: The tile to bind for. - * @vma: The vma to bind. - * @q: The exec_queue with which to do pipelined page-table updates. - * @syncs: Entries to sync on before binding the built tree to the live vm tree. - * @num_syncs: Number of @sync entries. - * @rebind: Whether we're rebinding this vma to the same address range without - * an unbind in-between. - * - * This function builds a page-table tree (see xe_pt_stage_bind() for more - * information on page-table building), and the xe_vm_pgtable_update entries - * abstracting the operations needed to attach it to the main vm tree. It - * then takes the relevant locks and updates the metadata side of the main - * vm tree and submits the operations for pipelined attachment of the - * gpu page-table to the vm main tree, (which can be done either by the - * cpu and the GPU). - * - * Return: A valid dma-fence representing the pipelined attachment operation - * on success, an error pointer on error. - */ -struct dma_fence * -__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs, - bool rebind) -{ - struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; - struct xe_pt_migrate_pt_update bind_pt_update = { - .base = { - .ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops, - .vma = vma, - .tile_id = tile->id, - }, - .bind = true, - }; - struct xe_vm *vm = xe_vma_vm(vma); - u32 num_entries; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL; - struct xe_range_fence *rfence; - int err; - - bind_pt_update.locked = false; - xe_bo_assert_held(xe_vma_bo(vma)); - xe_vm_assert_held(vm); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma), q); - - err = xe_pt_prepare_bind(tile, vma, entries, &num_entries); - if (err) - goto err; - - err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); - if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); - if (err) - goto err; - - xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); - - xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); - xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries, - num_entries); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. on LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * on !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) - return ERR_PTR(-ENOMEM); - } else if (rebind && !xe_vm_in_lr_mode(vm)) { - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - } - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - kfree(ifence); - return ERR_PTR(-ENOMEM); - } - - fence = xe_migrate_update_pgtables(tile->migrate, - vm, xe_vma_bo(vma), q, - entries, num_entries, - syncs, num_syncs, - &bind_pt_update.base); - if (!IS_ERR(fence)) { - bool last_munmap_rebind = vma->gpuva.flags & XE_VMA_LAST_REBIND; - LLIST_HEAD(deferred); - int err; - - err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - bind_pt_update.base.start, - bind_pt_update.base.last, fence); - if (err) - dma_fence_wait(fence, false); - - /* TLB invalidation must be done before signaling rebind */ - if (ifence) { - int err = invalidation_fence_init(tile->primary_gt, - ifence, fence, - xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); - if (err) { - dma_fence_put(fence); - kfree(ifence); - return ERR_PTR(err); - } - fence = &ifence->base.base; - } - - /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, rebind || - last_munmap_rebind ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - DMA_RESV_USAGE_BOOKKEEP); - xe_pt_commit_bind(vma, entries, num_entries, rebind, - bind_pt_update.locked ? &deferred : NULL); - - /* This vma is live (again?) now */ - vma->tile_present |= BIT(tile->id); - - if (bind_pt_update.locked) { - to_userptr_vma(vma)->userptr.initial_bind = true; - up_read(&vm->userptr.notifier_lock); - xe_bo_put_commit(&deferred); - } - if (!rebind && last_munmap_rebind && - xe_vm_in_preempt_fence_mode(vm)) - xe_vm_queue_rebind_worker(vm); - } else { - kfree(rfence); - kfree(ifence); - if (bind_pt_update.locked) - up_read(&vm->userptr.notifier_lock); - xe_pt_abort_bind(vma, entries, num_entries); - } - - return fence; - -err: - return ERR_PTR(err); -} - -struct xe_pt_stage_unbind_walk { - /** @base: The pagewalk base-class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're unbinding from. */ - struct xe_tile *tile; - - /** - * @modified_start: Walk range start, modified to include any - * shared pagetables that we're the only user of and can thus - * treat as private. - */ - u64 modified_start; - /** @modified_end: Walk range start, modified like @modified_start. */ - u64 modified_end; - - /* Output */ - /* @wupd: Structure to track the page-table updates we're building */ - struct xe_walk_update wupd; -}; - -/* - * Check whether this range is the only one populating this pagetable, - * and in that case, update the walk range checks so that higher levels don't - * view us as a shared pagetable. - */ -static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, - const struct xe_pt *child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - unsigned int shift = walk->shifts[level]; - u64 size = 1ull << shift; - - if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && - ((next - addr) >> shift) == child->num_live) { - u64 size = 1ull << walk->shifts[level + 1]; - - *action = ACTION_CONTINUE; - - if (xe_walk->modified_start >= addr) - xe_walk->modified_start = round_down(addr, size); - if (xe_walk->modified_end <= next) - xe_walk->modified_end = round_up(next, size); - - return true; - } - - return false; -} - -static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); - - return 0; -} - -static int -xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - u64 size = 1ull << walk->shifts[--level]; - - if (!IS_ALIGNED(addr, size)) - addr = xe_walk->modified_start; - if (!IS_ALIGNED(next, size)) - next = xe_walk->modified_end; - - /* Parent == *child is the root pt. Don't kill it. */ - if (parent != *child && - xe_pt_check_kill(addr, next, level, xe_child, action, walk)) - return 0; - - if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, - &end_offset)) - return 0; - - (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false); - xe_walk->wupd.updates[level].update->qwords = end_offset - offset; - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { - .pt_entry = xe_pt_stage_unbind_entry, - .pt_post_descend = xe_pt_stage_unbind_post_descend, -}; - -/** - * xe_pt_stage_unbind() - Build page-table update structures for an unbind - * operation - * @tile: The tile we're unbinding for. - * @vma: The vma we're unbinding. - * @entries: Caller-provided storage for the update structures. - * - * Builds page-table update structures for an unbind operation. The function - * will attempt to remove all page-tables that we're the only user - * of, and for that to work, the unbind operation must be committed in the - * same critical section that blocks racing binds to the same page-table tree. - * - * Return: The number of entries used. - */ -static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries) -{ - struct xe_pt_stage_unbind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_unbind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - .modified_start = xe_vma_start(vma), - .modified_end = xe_vma_end(vma), - .wupd.entries = entries, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.wupd.num_used_entries; -} - -static void -xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, - struct xe_tile *tile, struct iosys_map *map, - void *ptr, u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_vma *vma = pt_update->vma; - u64 empty = __xe_pt_empty_pte(tile, xe_vma_vm(vma), update->pt->level); - int i; - - if (map && map->is_iomem) - for (i = 0; i < num_qwords; ++i) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, empty); - else if (map) - memset64(map->vaddr + qword_ofs * sizeof(u64), empty, - num_qwords); - else - memset64(ptr, empty, num_qwords); -} - -static void -xe_pt_commit_unbind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 num_entries, - struct llist_head *deferred) -{ - u32 j; - - xe_pt_commit_locks_assert(vma); - - for (j = 0; j < num_entries; ++j) { - struct xe_vm_pgtable_update *entry = &entries[j]; - struct xe_pt *pt = entry->pt; - - pt->num_live -= entry->qwords; - if (pt->level) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - u32 i; - - for (i = entry->ofs; i < entry->ofs + entry->qwords; - i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[i] = NULL; - } - } - } -} - -<<<<<<< -static const struct xe_migrate_pt_update_ops unbind_ops = { - .populate = xe_migrate_clear_pgtable_callback, -======= -static void -xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int i, level = 0; - u64 start, last; - - for (i = 0; i < pt_op->num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &pt_op->entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level)); - last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1; - - if (start < pt_update_ops->start) - pt_update_ops->start = start; - if (last > pt_update_ops->last) - pt_update_ops->last = last; -} - -static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) -{ - int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0; - - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, - xe->info.tile_count << shift); - - return 0; -} - -static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int err; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - pt_op->vma = NULL; - pt_op->bind = true; - pt_op->rebind = BIT(tile->id) & vma->tile_present; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - err = xe_pt_prepare_bind(tile, vma, pt_op->entries, - &pt_op->num_entries); - if (!err) { - xe_tile_assert(tile, pt_op->num_entries <= - ARRAY_SIZE(pt_op->entries)); - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, true); - - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. On LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * On !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!pt_op->rebind && xe_vm_has_scratch(vm) && - xe_vm_in_preempt_fence_mode(vm))) - pt_update_ops->needs_invalidation = true; - else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - - vma->tile_staged |= BIT(tile->id); - pt_op->vma = vma; - xe_pt_commit_prepare_bind(vma, pt_op->entries, - pt_op->num_entries, pt_op->rebind); - } else { - xe_pt_cancel_bind(vma, pt_op->entries, pt_op->num_entries); - } - - return err; -} - -static int unbind_op_prepare(struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int err; - - if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id))) - return 0; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - /* - * Wait for invalidation to complete. Can corrupt internal page table - * state if an invalidation is running while preparing an unbind. - */ - if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma))) - mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier); - - pt_op->vma = vma; - pt_op->bind = false; - pt_op->rebind = false; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries); - - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, false); - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - pt_update_ops->needs_invalidation = true; - - xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries); - - return 0; -} - -static int op_prepare(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op) -{ - int err = 0; - - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma); - pt_update_ops->wait_vm_kernel = true; - break; - case DRM_GPUVA_OP_REMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va)); - - if (!err && op->remap.prev) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.prev); - pt_update_ops->wait_vm_bookkeep = true; - } - if (!err && op->remap.next) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.next); - pt_update_ops->wait_vm_bookkeep = true; - } - break; - case DRM_GPUVA_OP_UNMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va)); - break; - case DRM_GPUVA_OP_PREFETCH: - err = bind_op_prepare(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va)); - pt_update_ops->wait_vm_kernel = true; - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static void -xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops) -{ - init_llist_head(&pt_update_ops->deferred); - pt_update_ops->start = ~0x0ull; - pt_update_ops->last = 0x0ull; -} - -/** - * xe_pt_update_ops_prepare() - Prepare PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Prepare PT update operations which includes updating internal PT state, - * allocate memory for page tables, populate page table being pruned in, and - * create PT update operations for leaf insertion / removal. - * - * Return: 0 on success, negative error code on error. - */ -int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct xe_vma_op *op; - int shift = tile->media_gt ? 1 : 0; - int err; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - xe_pt_update_ops_init(pt_update_ops); - - err = dma_resv_reserve_fences(xe_vm_resv(vops->vm), - tile_to_xe(tile)->info.tile_count << shift); - if (err) - return err; - - list_for_each_entry(op, &vops->list, link) { - err = op_prepare(vops->vm, tile, pt_update_ops, op); - - if (err) - return err; - } - - xe_tile_assert(tile, pt_update_ops->current_op <= - pt_update_ops->num_ops); - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE) - return -ENOSPC; -#endif - - return 0; -} - -static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present |= BIT(tile->id); - vma->tile_staged &= ~BIT(tile->id); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - to_userptr_vma(vma)->userptr.initial_bind = true; - } - - /* - * Kick rebind worker if this bind triggers preempt fences and not in - * the rebind worker - */ - if (pt_update_ops->wait_vm_bookkeep && - xe_vm_in_preempt_fence_mode(vm) && - !current->mm) - xe_vm_queue_rebind_worker(vm); -} - -static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present &= ~BIT(tile->id); - if (!vma->tile_present) { - list_del_init(&vma->combined_links.rebind); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - } -} - -static void op_commit(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op, struct dma_fence *fence, - struct dma_fence *fence2) -{ - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, - fence2); - break; - case DRM_GPUVA_OP_REMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence, - fence2); - - if (op->remap.prev) - bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence, fence2); - if (op->remap.next) - bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence, fence2); - break; - case DRM_GPUVA_OP_UNMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence, fence2); - break; - case DRM_GPUVA_OP_PREFETCH: - bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence, fence2); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } -} - -static const struct xe_migrate_pt_update_ops migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, ->>>>>>> - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { - .populate = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -/** - * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma - * address range. - * @tile: The tile to unbind for. - * @vma: The vma to unbind. - * @q: The exec_queue with which to do pipelined page-table updates. - * @syncs: Entries to sync on before disconnecting the tree to be destroyed. - * @num_syncs: Number of @sync entries. - * - * This function builds a the xe_vm_pgtable_update entries abstracting the - * operations needed to detach the page-table tree to be destroyed from the - * man vm tree. - * It then takes the relevant locks and submits the operations for - * pipelined detachment of the gpu page-table from the vm main tree, - * (which can be done either by the cpu and the GPU), Finally it frees the - * detached page-table tree. - * - * Return: A valid dma-fence representing the pipelined detachment operation - * on success, an error pointer on error. - */ -struct dma_fence * -__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs) -{ -<<<<<<< - struct xe_vm *vm = vops->vm; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL, *mfence = NULL; - struct dma_fence **fences = NULL; - struct dma_fence_array *cf = NULL; - struct xe_range_fence *rfence; - struct xe_vma_op *op; - int err = 0, i; - struct xe_migrate_pt_update update = { - .ops = pt_update_ops->needs_userptr_lock ? - &userptr_migrate_ops : - &migrate_ops, - .vops = vops, - .tile_id = tile->id, -======= - struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; - struct xe_pt_migrate_pt_update unbind_pt_update = { - .base = { - .ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops : - &unbind_ops, - .vma = vma, - .tile_id = tile->id, - }, ->>>>>>> - }; - struct xe_vm *vm = xe_vma_vm(vma); - u32 num_entries; - struct dma_fence *fence = NULL; - struct invalidation_fence *ifence; - struct xe_range_fence *rfence; - int err; - - LLIST_HEAD(deferred); - - xe_bo_assert_held(xe_vma_bo(vma)); - xe_vm_assert_held(vm); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma), q); - - num_entries = xe_pt_stage_unbind(tile, vma, entries); - xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); - - xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); - xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, - num_entries); - -<<<<<<< - err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); - if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); - if (err) - return ERR_PTR(err); - - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) - return ERR_PTR(-ENOMEM); -======= - if (pt_update_ops->needs_invalidation) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) { - err = -ENOMEM; - goto kill_vm_tile1; - } - if (tile->media_gt) { - mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!mfence) { - err = -ENOMEM; - goto free_ifence; - } - fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL); - if (!fences) { - err = -ENOMEM; - goto free_ifence; - } - cf = dma_fence_array_alloc(2); - if (!cf) { - err = -ENOMEM; - goto free_ifence; - } - } - } ->>>>>>> - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - kfree(ifence); - return ERR_PTR(-ENOMEM); - } - - /* - * Even if we were already evicted and unbind to destroy, we need to - * clear again here. The eviction may have updated pagetables at a - * lower level, because it needs to be more conservative. - */ - fence = xe_migrate_update_pgtables(tile->migrate, - vm, NULL, q ? q : - vm->q[tile->id], - entries, num_entries, - syncs, num_syncs, - &unbind_pt_update.base); - if (!IS_ERR(fence)) { - int err; - - err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - unbind_pt_update.base.start, - unbind_pt_update.base.last, fence); - if (err) - dma_fence_wait(fence, false); - -<<<<<<< - /* TLB invalidation must be done before signaling unbind */ - err = invalidation_fence_init(tile->primary_gt, ifence, fence, - xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); - if (err) { - dma_fence_put(fence); - kfree(ifence); - return ERR_PTR(err); - } - fence = &ifence->base.base; - - /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, - DMA_RESV_USAGE_BOOKKEEP); - - /* This fence will be installed by caller when doing eviction */ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - DMA_RESV_USAGE_BOOKKEEP); - xe_pt_commit_unbind(vma, entries, num_entries, - unbind_pt_update.locked ? &deferred : NULL); - vma->tile_present &= ~BIT(tile->id); - } else { - kfree(rfence); - kfree(ifence); - } - - if (!vma->tile_present) - list_del_init(&vma->combined_links.rebind); - - if (unbind_pt_update.locked) { - xe_tile_assert(tile, xe_vma_is_userptr(vma)); -======= - xe_pt_commit(pt_op->vma, pt_op->entries, - pt_op->num_entries, &pt_update_ops->deferred); - pt_op->vma = NULL; /* skip in xe_pt_update_ops_abort */ - } - - if (xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - pt_update_ops->start, - pt_update_ops->last, fence)) - dma_fence_wait(fence, false); - - /* tlb invalidation must be done before signaling rebind */ - if (ifence) { - if (mfence) - dma_fence_get(fence); - invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - if (mfence) { - invalidation_fence_init(tile->media_gt, mfence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - fences[0] = &ifence->base.base; - fences[1] = &mfence->base.base; - dma_fence_array_init(cf, 2, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - fence = &cf->base; - } else { - fence = &ifence->base.base; - } - } - - if (!mfence) { - dma_resv_add_fence(xe_vm_resv(vm), fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); - } else { - dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, - &ifence->base.base, &mfence->base.base); - } ->>>>>>> - - if (!vma->tile_present) { - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - up_read(&vm->userptr.notifier_lock); - xe_bo_put_commit(&deferred); - } - - return fence; -<<<<<<< -======= - -free_rfence: - kfree(rfence); -free_ifence: - kfree(cf); - kfree(fences); - kfree(mfence); - kfree(ifence); -kill_vm_tile1: - if (err != -EAGAIN && tile->id) - xe_vm_kill(vops->vm, false); - - return ERR_PTR(err); -} - -/** - * xe_pt_update_ops_fini() - Finish PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operations - * - * Finish PT update operations by committing to destroy page table memory - */ -void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - for (i = 0; i < pt_update_ops->current_op; ++i) { - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; - - xe_pt_free_bind(pt_op->entries, pt_op->num_entries); - } - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); -} - -/** - * xe_pt_update_ops_abort() - Abort PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Abort PT update operations by unwinding internal PT state - */ -void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - for (i = pt_update_ops->num_ops - 1; i >= 0; --i) { - struct xe_vm_pgtable_update_op *pt_op = - &pt_update_ops->ops[i]; - - if (!pt_op->vma || i >= pt_update_ops->current_op) - continue; - - if (pt_op->bind) - xe_pt_abort_bind(pt_op->vma, pt_op->entries, - pt_op->num_entries, - pt_op->rebind); - else - xe_pt_abort_unbind(pt_op->vma, pt_op->entries, - pt_op->num_entries); - } - - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); ->>>>>>> -} diff --git a/rr-cache/4951c0e45d299a9570812ec9f1cc27e11aa21d6e/preimage.1 b/rr-cache/4951c0e45d299a9570812ec9f1cc27e11aa21d6e/preimage.1 deleted file mode 100644 index bb6eadffff5e..000000000000 --- a/rr-cache/4951c0e45d299a9570812ec9f1cc27e11aa21d6e/preimage.1 +++ /dev/null @@ -1,2248 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include <linux/dma-fence-array.h> - -#include "xe_pt.h" - -#include "regs/xe_gtt_defs.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_gt.h" -#include "xe_gt_tlb_invalidation.h" -#include "xe_migrate.h" -#include "xe_pt_types.h" -#include "xe_pt_walk.h" -#include "xe_res_cursor.h" -#include "xe_trace.h" -#include "xe_ttm_stolen_mgr.h" -#include "xe_vm.h" - -struct xe_pt_dir { - struct xe_pt pt; - /** @children: Array of page-table child nodes */ - struct xe_ptw *children[XE_PDES]; -}; - -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) -#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) -#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) -#else -#define xe_pt_set_addr(__xe_pt, __addr) -#define xe_pt_addr(__xe_pt) 0ull -#endif - -static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; -static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; - -#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) - -static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) -{ - return container_of(pt, struct xe_pt_dir, pt); -} - -static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) -{ - return container_of(pt_dir->children[index], struct xe_pt, base); -} - -static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, - unsigned int level) -{ - struct xe_device *xe = tile_to_xe(tile); - u16 pat_index = xe->pat.idx[XE_CACHE_WB]; - u8 id = tile->id; - - if (!xe_vm_has_scratch(vm)) - return 0; - - if (level > MAX_HUGEPTE_LEVEL) - return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, - 0, pat_index); - - return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | - XE_PTE_NULL; -} - -static void xe_pt_free(struct xe_pt *pt) -{ - if (pt->level) - kfree(as_xe_pt_dir(pt)); - else - kfree(pt); -} - -/** - * xe_pt_create() - Create a page-table. - * @vm: The vm to create for. - * @tile: The tile to create for. - * @level: The page-table level. - * - * Allocate and initialize a single struct xe_pt metadata structure. Also - * create the corresponding page-table bo, but don't initialize it. If the - * level is grater than zero, then it's assumed to be a directory page- - * table and the directory structure is also allocated and initialized to - * NULL pointers. - * - * Return: A valid struct xe_pt pointer on success, Pointer error code on - * error. - */ -struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level) -{ - struct xe_pt *pt; - struct xe_bo *bo; - int err; - - if (level) { - struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL); - - pt = (dir) ? &dir->pt : NULL; - } else { - pt = kzalloc(sizeof(*pt), GFP_KERNEL); - } - if (!pt) - return ERR_PTR(-ENOMEM); - - pt->level = level; - bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_NO_RESV_EVICT | - XE_BO_FLAG_PAGETABLE); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - goto err_kfree; - } - pt->bo = bo; - pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; - - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, pt->bo); - xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL); - - return pt; - -err_kfree: - xe_pt_free(pt); - return ERR_PTR(err); -} - -/** - * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero - * entries. - * @tile: The tile the scratch pagetable of which to use. - * @vm: The vm we populate for. - * @pt: The pagetable the bo of which to initialize. - * - * Populate the page-table bo of @pt with entries pointing into the tile's - * scratch page-table tree if any. Otherwise populate with zeros. - */ -void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, - struct xe_pt *pt) -{ - struct iosys_map *map = &pt->bo->vmap; - u64 empty; - int i; - - if (!xe_vm_has_scratch(vm)) { - /* - * FIXME: Some memory is allocated already allocated to zero? - * Find out which memory that is and avoid this memset... - */ - xe_map_memset(vm->xe, map, 0, 0, SZ_4K); - } else { - empty = __xe_pt_empty_pte(tile, vm, pt->level); - for (i = 0; i < XE_PDES; i++) - xe_pt_write(vm->xe, map, i, empty); - } -} - -/** - * xe_pt_shift() - Return the ilog2 value of the size of the address range of - * a page-table at a certain level. - * @level: The level. - * - * Return: The ilog2 value of the size of the address range of a page-table - * at level @level. - */ -unsigned int xe_pt_shift(unsigned int level) -{ - return XE_PTE_SHIFT + XE_PDE_SHIFT * level; -} - -/** - * xe_pt_destroy() - Destroy a page-table tree. - * @pt: The root of the page-table tree to destroy. - * @flags: vm flags. Currently unused. - * @deferred: List head of lockless list for deferred putting. NULL for - * immediate putting. - * - * Puts the page-table bo, recursively calls xe_pt_destroy on all children - * and finally frees @pt. TODO: Can we remove the @flags argument? - */ -void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) -{ - int i; - - if (!pt) - return; - - XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list)); - xe_bo_unpin(pt->bo); - xe_bo_put_deferred(pt->bo, deferred); - - if (pt->level > 0 && pt->num_live) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - - for (i = 0; i < XE_PDES; i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, - deferred); - } - } - xe_pt_free(pt); -} - -/** - * DOC: Pagetable building - * - * Below we use the term "page-table" for both page-directories, containing - * pointers to lower level page-directories or page-tables, and level 0 - * page-tables that contain only page-table-entries pointing to memory pages. - * - * When inserting an address range in an already existing page-table tree - * there will typically be a set of page-tables that are shared with other - * address ranges, and a set that are private to this address range. - * The set of shared page-tables can be at most two per level, - * and those can't be updated immediately because the entries of those - * page-tables may still be in use by the gpu for other mappings. Therefore - * when inserting entries into those, we instead stage those insertions by - * adding insertion data into struct xe_vm_pgtable_update structures. This - * data, (subtrees for the cpu and page-table-entries for the gpu) is then - * added in a separate commit step. CPU-data is committed while still under the - * vm lock, the object lock and for userptr, the notifier lock in read mode. - * The GPU async data is committed either by the GPU or CPU after fulfilling - * relevant dependencies. - * For non-shared page-tables (and, in fact, for shared ones that aren't - * existing at the time of staging), we add the data in-place without the - * special update structures. This private part of the page-table tree will - * remain disconnected from the vm page-table tree until data is committed to - * the shared page tables of the vm tree in the commit phase. - */ - -struct xe_pt_update { - /** @update: The update structure we're building for this parent. */ - struct xe_vm_pgtable_update *update; - /** @parent: The parent. Used to detect a parent change. */ - struct xe_pt *parent; - /** @preexisting: Whether the parent was pre-existing or allocated */ - bool preexisting; -}; - -struct xe_pt_stage_bind_walk { - /** base: The base class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @vm: The vm we're building for. */ - struct xe_vm *vm; - /** @tile: The tile we're building for. */ - struct xe_tile *tile; - /** @default_pte: PTE flag only template. No address is associated */ - u64 default_pte; - /** @dma_offset: DMA offset to add to the PTE. */ - u64 dma_offset; - /** - * @needs_64k: This address range enforces 64K alignment and - * granularity. - */ - bool needs_64K; - /** - * @vma: VMA being mapped - */ - struct xe_vma *vma; - - /* Also input, but is updated during the walk*/ - /** @curs: The DMA address cursor. */ - struct xe_res_cursor *curs; - /** @va_curs_start: The Virtual address coresponding to @curs->start */ - u64 va_curs_start; - - /* Output */ - struct xe_walk_update { - /** @wupd.entries: Caller provided storage. */ - struct xe_vm_pgtable_update *entries; - /** @wupd.num_used_entries: Number of update @entries used. */ - unsigned int num_used_entries; - /** @wupd.updates: Tracks the update entry at a given level */ - struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; - } wupd; - - /* Walk state */ - /** - * @l0_end_addr: The end address of the current l0 leaf. Used for - * 64K granularity detection. - */ - u64 l0_end_addr; - /** @addr_64K: The start address of the current 64K chunk. */ - u64 addr_64K; - /** @found_64: Whether @add_64K actually points to a 64K chunk. */ - bool found_64K; -}; - -static int -xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, - pgoff_t offset, bool alloc_entries) -{ - struct xe_pt_update *upd = &wupd->updates[parent->level]; - struct xe_vm_pgtable_update *entry; - - /* - * For *each level*, we could only have one active - * struct xt_pt_update at any one time. Once we move on to a - * new parent and page-directory, the old one is complete, and - * updates are either already stored in the build tree or in - * @wupd->entries - */ - if (likely(upd->parent == parent)) - return 0; - - upd->parent = parent; - upd->preexisting = true; - - if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) - return -EINVAL; - - entry = wupd->entries + wupd->num_used_entries++; - upd->update = entry; - entry->ofs = offset; - entry->pt_bo = parent->bo; - entry->pt = parent; - entry->flags = 0; - entry->qwords = 0; - - if (alloc_entries) { - entry->pt_entries = kmalloc_array(XE_PDES, - sizeof(*entry->pt_entries), - GFP_KERNEL); - if (!entry->pt_entries) - return -ENOMEM; - } - - return 0; -} - -/* - * NOTE: This is a very frequently called function so we allow ourselves - * to annotate (using branch prediction hints) the fastpath of updating a - * non-pre-existing pagetable with leaf ptes. - */ -static int -xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, - pgoff_t offset, struct xe_pt *xe_child, u64 pte) -{ - struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; - struct xe_pt_update *child_upd = xe_child ? - &xe_walk->wupd.updates[xe_child->level] : NULL; - int ret; - - ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); - if (unlikely(ret)) - return ret; - - /* - * Register this new pagetable so that it won't be recognized as - * a shared pagetable by a subsequent insertion. - */ - if (unlikely(child_upd)) { - child_upd->update = NULL; - child_upd->parent = xe_child; - child_upd->preexisting = false; - } - - if (likely(!upd->preexisting)) { - /* Continue building a non-connected subtree. */ - struct iosys_map *map = &parent->bo->vmap; - - if (unlikely(xe_child)) - parent->base.children[offset] = &xe_child->base; - - xe_pt_write(xe_walk->vm->xe, map, offset, pte); - parent->num_live++; - } else { - /* Shared pt. Stage update. */ - unsigned int idx; - struct xe_vm_pgtable_update *entry = upd->update; - - idx = offset - entry->ofs; - entry->pt_entries[idx].pt = xe_child; - entry->pt_entries[idx].pte = pte; - entry->qwords++; - } - - return 0; -} - -static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, - struct xe_pt_stage_bind_walk *xe_walk) -{ - u64 size, dma; - - if (level > MAX_HUGEPTE_LEVEL) - return false; - - /* Does the virtual range requested cover a huge pte? */ - if (!xe_pt_covers(addr, next, level, &xe_walk->base)) - return false; - - /* Does the DMA segment cover the whole pte? */ - if (next - xe_walk->va_curs_start > xe_walk->curs->size) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - /* Is the DMA address huge PTE size aligned? */ - size = next - addr; - dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); - - return IS_ALIGNED(dma, size); -} - -/* - * Scan the requested mapping to check whether it can be done entirely - * with 64K PTEs. - */ -static bool -xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - struct xe_res_cursor curs = *xe_walk->curs; - - if (!IS_ALIGNED(addr, SZ_64K)) - return false; - - if (next > xe_walk->l0_end_addr) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - xe_res_next(&curs, addr - xe_walk->va_curs_start); - for (; addr < next; addr += SZ_64K) { - if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) - return false; - - xe_res_next(&curs, SZ_64K); - } - - return addr == next; -} - -/* - * For non-compact "normal" 4K level-0 pagetables, we want to try to group - * addresses together in 64K-contigous regions to add a 64K TLB hint for the - * device to the PTE. - * This function determines whether the address is part of such a - * segment. For VRAM in normal pagetables, this is strictly necessary on - * some devices. - */ -static bool -xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - /* Address is within an already found 64k region */ - if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) - return true; - - xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); - xe_walk->addr_64K = addr; - - return xe_walk->found_64K; -} - -static int -xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_bind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - u16 pat_index = xe_walk->vma->pat_index; - struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); - struct xe_vm *vm = xe_walk->vm; - struct xe_pt *xe_child; - bool covers; - int ret = 0; - u64 pte; - - /* Is this a leaf entry ?*/ - if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { - struct xe_res_cursor *curs = xe_walk->curs; - bool is_null = xe_vma_is_null(xe_walk->vma); - - XE_WARN_ON(xe_walk->va_curs_start != addr); - - pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : - xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->vma, pat_index, level); - pte |= xe_walk->default_pte; - - /* - * Set the XE_PTE_PS64 hint if possible, otherwise if - * this device *requires* 64K PTE size for VRAM, fail. - */ - if (level == 0 && !xe_parent->is_compact) { - if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; - pte |= XE_PTE_PS64; - } else if (XE_WARN_ON(xe_walk->needs_64K)) { - return -EINVAL; - } - } - - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); - if (unlikely(ret)) - return ret; - - if (!is_null) - xe_res_next(curs, next - addr); - xe_walk->va_curs_start = next; - xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); - *action = ACTION_CONTINUE; - - return ret; - } - - /* - * Descending to lower level. Determine if we need to allocate a - * new page table or -directory, which we do if there is no - * previous one or there is one we can completely replace. - */ - if (level == 1) { - walk->shifts = xe_normal_pt_shifts; - xe_walk->l0_end_addr = next; - } - - covers = xe_pt_covers(addr, next, level, &xe_walk->base); - if (covers || !*child) { - u64 flags = 0; - - xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); - if (IS_ERR(xe_child)) - return PTR_ERR(xe_child); - - xe_pt_set_addr(xe_child, - round_down(addr, 1ull << walk->shifts[level])); - - if (!covers) - xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child); - - *child = &xe_child->base; - - /* - * Prefer the compact pagetable layout for L0 if possible. Only - * possible if VMA covers entire 2MB region as compact 64k and - * 4k pages cannot be mixed within a 2MB region. - * TODO: Suballocate the pt bo to avoid wasting a lot of - * memory. - */ - if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && - covers && xe_pt_scan_64K(addr, next, xe_walk)) { - walk->shifts = xe_compact_pt_shifts; - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT; - flags |= XE_PDE_64K; - xe_child->is_compact = true; - } - - pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, - pte); - } - - *action = ACTION_SUBTREE; - return ret; -} - -static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { - .pt_entry = xe_pt_stage_bind_entry, -}; - -/** - * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address - * range. - * @tile: The tile we're building for. - * @vma: The vma indicating the address range. - * @entries: Storage for the update entries used for connecting the tree to - * the main tree at commit time. - * @num_entries: On output contains the number of @entries used. - * - * This function builds a disconnected page-table tree for a given address - * range. The tree is connected to the main vm tree for the gpu using - * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). - * The function builds xe_vm_pgtable_update structures for already existing - * shared page-tables, and non-existing shared and non-shared page-tables - * are built and populated directly. - * - * Return 0 on success, negative error code on error. - */ -static int -xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - struct xe_device *xe = tile_to_xe(tile); - struct xe_bo *bo = xe_vma_bo(vma); - bool is_devmem = !xe_vma_is_userptr(vma) && bo && - (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); - struct xe_res_cursor curs; - struct xe_pt_stage_bind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_bind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .vm = xe_vma_vm(vma), - .tile = tile, - .curs = &curs, - .va_curs_start = xe_vma_start(vma), - .vma = vma, - .wupd.entries = entries, - .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - int ret; - - /** - * Default atomic expectations for different allocation scenarios are as follows: - * - * 1. Traditional API: When the VM is not in LR mode: - * - Device atomics are expected to function with all allocations. - * - * 2. Compute/SVM API: When the VM is in LR mode: - * - Device atomics are the default behavior when the bo is placed in a single region. - * - In all other cases device atomics will be disabled with AE=0 until an application - * request differently using a ioctl like madvise. - */ - if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { - if (xe_vm_in_lr_mode(xe_vma_vm(vma))) { - if (bo && xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - /** - * If a SMEM+LMEM allocation is backed by SMEM, a device - * atomics will cause a gpu page fault and which then - * gets migrated to LMEM, bind such allocations with - * device atomics enabled. - */ - else if (is_devmem && !xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } else { - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } - - /** - * Unset AE if the platform(PVC) doesn't support it on an - * allocation - */ - if (!xe->info.has_device_atomics_on_smem && !is_devmem) - xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE; - } - - if (is_devmem) { - xe_walk.default_pte |= XE_PPGTT_PTE_DM; - xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); - } - - if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) - xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); - - xe_bo_assert_held(bo); - - if (!xe_vma_is_null(vma)) { - if (xe_vma_is_userptr(vma)) - xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, - xe_vma_size(vma), &curs); - else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) - xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - else - xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - } else { - curs.size = xe_vma_size(vma); - } - - ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - *num_entries = xe_walk.wupd.num_used_entries; - return ret; -} - -/** - * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a - * shared pagetable. - * @addr: The start address within the non-shared pagetable. - * @end: The end address within the non-shared pagetable. - * @level: The level of the non-shared pagetable. - * @walk: Walk info. The function adjusts the walk action. - * @action: next action to perform (see enum page_walk_action) - * @offset: Ignored on input, First non-shared entry on output. - * @end_offset: Ignored on input, Last non-shared entry + 1 on output. - * - * A non-shared page-table has some entries that belong to the address range - * and others that don't. This function determines the entries that belong - * fully to the address range. Depending on level, some entries may - * partially belong to the address range (that can't happen at level 0). - * The function detects that and adjust those offsets to not include those - * partial entries. Iff it does detect partial entries, we know that there must - * be shared page tables also at lower levels, so it adjusts the walk action - * accordingly. - * - * Return: true if there were non-shared entries, false otherwise. - */ -static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, - struct xe_pt_walk *walk, - enum page_walk_action *action, - pgoff_t *offset, pgoff_t *end_offset) -{ - u64 size = 1ull << walk->shifts[level]; - - *offset = xe_pt_offset(addr, level, walk); - *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; - - if (!level) - return true; - - /* - * If addr or next are not size aligned, there are shared pts at lower - * level, so in that case traverse down the subtree - */ - *action = ACTION_CONTINUE; - if (!IS_ALIGNED(addr, size)) { - *action = ACTION_SUBTREE; - (*offset)++; - } - - if (!IS_ALIGNED(end, size)) { - *action = ACTION_SUBTREE; - (*end_offset)--; - } - - return *end_offset > *offset; -} - -struct xe_pt_zap_ptes_walk { - /** @base: The walk base-class */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're building for */ - struct xe_tile *tile; - - /* Output */ - /** @needs_invalidate: Whether we need to invalidate TLB*/ - bool needs_invalidate; -}; - -static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_zap_ptes_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - /* - * Note that we're called from an entry callback, and we're dealing - * with the child of that entry rather than the parent, so need to - * adjust level down. - */ - if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, - &end_offset)) { - xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap, - offset * sizeof(u64), 0, - (end_offset - offset) * sizeof(u64)); - xe_walk->needs_invalidate = true; - } - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { - .pt_entry = xe_pt_zap_ptes_entry, -}; - -/** - * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range - * @tile: The tile we're zapping for. - * @vma: GPU VMA detailing address range. - * - * Eviction and Userptr invalidation needs to be able to zap the - * gpu ptes of a given address range in pagefaulting mode. - * In order to be able to do that, that function needs access to the shared - * page-table entrieaso it can either clear the leaf PTEs or - * clear the pointers to lower-level page-tables. The caller is required - * to hold the necessary locks to ensure neither the page-table connectivity - * nor the page-table entries of the range is updated from under us. - * - * Return: Whether ptes were actually updated and a TLB invalidation is - * required. - */ -bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) -{ - struct xe_pt_zap_ptes_walk xe_walk = { - .base = { - .ops = &xe_pt_zap_ptes_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); - - if (!(pt_mask & BIT(tile->id))) - return false; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.needs_invalidate; -} - -static void -xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, - struct iosys_map *map, void *data, - u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_pt_entry *ptes = update->pt_entries; - u64 *ptr = data; - u32 i; - - for (i = 0; i < num_qwords; i++) { - if (map) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, ptes[i].pte); - else - ptr[i] = ptes[i].pte; - } -} - -static void xe_pt_abort_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i, j; - - for (i = 0; i < num_entries; i++) { - if (!entries[i].pt_entries) - continue; - - for (j = 0; j < entries[i].qwords; j++) - xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL); - kfree(entries[i].pt_entries); - } -} - -static void xe_pt_commit_locks_assert(struct xe_vma *vma) -{ - struct xe_vm *vm = xe_vma_vm(vma); - - lockdep_assert_held(&vm->lock); - - if (xe_vma_is_userptr(vma)) - lockdep_assert_held_read(&vm->userptr.notifier_lock); - else if (!xe_vma_is_null(vma)) - dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); - - xe_vm_assert_held(vm); -} - -static void xe_pt_commit_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries, bool rebind, - struct llist_head *deferred) -{ - u32 i, j; - - xe_pt_commit_locks_assert(vma); - - for (i = 0; i < num_entries; i++) { - struct xe_pt *pt = entries[i].pt; - struct xe_pt_dir *pt_dir; - - if (!rebind) - pt->num_live += entries[i].qwords; - - if (!pt->level) { - kfree(entries[i].pt_entries); - continue; - } - - pt_dir = as_xe_pt_dir(pt); - for (j = 0; j < entries[i].qwords; j++) { - u32 j_ = j + entries[i].ofs; - struct xe_pt *newpte = entries[i].pt_entries[j].pt; - - if (xe_pt_entry(pt_dir, j_)) - xe_pt_destroy(xe_pt_entry(pt_dir, j_), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[j_] = &newpte->base; - } - kfree(entries[i].pt_entries); - } -} - -static int -xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - int err; - - *num_entries = 0; - err = xe_pt_stage_bind(tile, vma, entries, num_entries); - if (!err) - xe_tile_assert(tile, *num_entries); - else /* abort! */ - xe_pt_abort_bind(vma, entries, *num_entries); - - return err; -} - -static void xe_vm_dbg_print_entries(struct xe_device *xe, - const struct xe_vm_pgtable_update *entries, - unsigned int num_entries) -#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) -{ - unsigned int i; - - vm_dbg(&xe->drm, "%u entries to update\n", num_entries); - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - struct xe_pt *xe_pt = entry->pt; - u64 page_size = 1ull << xe_pt_shift(xe_pt->level); - u64 end; - u64 start; - - xe_assert(xe, !entry->pt->is_compact); - start = entry->ofs * page_size; - end = start + page_size * entry->qwords; - vm_dbg(&xe->drm, - "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", - i, xe_pt->level, entry->ofs, entry->qwords, - xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); - } -} -#else -{} -#endif - -#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT - -static int xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; - static u32 count; - - if (count++ % divisor == divisor - 1) { - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - - uvma->userptr.divisor = divisor << 1; - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - return true; - } - - return false; -} - -#else - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - return false; -} - -#endif - -/** - * struct xe_pt_migrate_pt_update - Callback argument for pre-commit callbacks - * @base: Base we derive from. - * @bind: Whether this is a bind or an unbind operation. A bind operation - * makes the pre-commit callback error with -EAGAIN if it detects a - * pending invalidation. - * @locked: Whether the pre-commit callback locked the userptr notifier lock - * and it needs unlocking. - */ -struct xe_pt_migrate_pt_update { - struct xe_migrate_pt_update base; - bool bind; - bool locked; -}; - -/* - * This function adds the needed dependencies to a page-table update job - * to make sure racing jobs for separate bind engines don't race writing - * to the same page-table range, wreaking havoc. Initially use a single - * fence for the entire VM. An optimization would use smaller granularity. - */ -static int xe_pt_vm_dependencies(struct xe_sched_job *job, - struct xe_range_fence_tree *rftree, - u64 start, u64 last) -{ - struct xe_range_fence *rtfence; - struct dma_fence *fence; - int err; - - rtfence = xe_range_fence_tree_first(rftree, start, last); - while (rtfence) { - fence = rtfence->fence; - - if (!dma_fence_is_signaled(fence)) { - /* - * Is this a CPU update? GPU is busy updating, so return - * an error - */ - if (!job) - return -ETIME; - - dma_fence_get(fence); - err = drm_sched_job_add_dependency(&job->drm, fence); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_next(rtfence, start, last); - } - - return 0; -} - -static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_range_fence_tree *rftree = - &xe_vma_vm(pt_update->vma)->rftree[pt_update->tile_id]; - - return xe_pt_vm_dependencies(pt_update->job, rftree, - pt_update->start, pt_update->last); -} - -static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_pt_migrate_pt_update *userptr_update = - container_of(pt_update, typeof(*userptr_update), base); - struct xe_userptr_vma *uvma = to_userptr_vma(pt_update->vma); - unsigned long notifier_seq = uvma->userptr.notifier_seq; - struct xe_vm *vm = xe_vma_vm(&uvma->vma); - int err = xe_pt_vm_dependencies(pt_update->job, - &vm->rftree[pt_update->tile_id], - pt_update->start, - pt_update->last); - - if (err) - return err; - - userptr_update->locked = false; - - /* - * Wait until nobody is running the invalidation notifier, and - * since we're exiting the loop holding the notifier lock, - * nobody can proceed invalidating either. - * - * Note that we don't update the vma->userptr.notifier_seq since - * we don't update the userptr pages. - */ - do { - down_read(&vm->userptr.notifier_lock); - if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq)) - break; - - up_read(&vm->userptr.notifier_lock); - - if (userptr_update->bind) - return -EAGAIN; - - notifier_seq = mmu_interval_read_begin(&uvma->userptr.notifier); - } while (true); - - /* Inject errors to test_whether they are handled correctly */ - if (userptr_update->bind && xe_pt_userptr_inject_eagain(uvma)) { - up_read(&vm->userptr.notifier_lock); - return -EAGAIN; - } - - userptr_update->locked = true; - - return 0; -} - -static const struct xe_migrate_pt_update_ops bind_ops = { - .populate = xe_vm_populate_pgtable, - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_bind_ops = { - .populate = xe_vm_populate_pgtable, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -struct invalidation_fence { - struct xe_gt_tlb_invalidation_fence base; - struct xe_gt *gt; - struct dma_fence *fence; - struct dma_fence_cb cb; - struct work_struct work; - u64 start; - u64 end; - u32 asid; -}; - -static void invalidation_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct invalidation_fence *ifence = - container_of(cb, struct invalidation_fence, cb); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); - if (!ifence->fence->error) { - queue_work(system_wq, &ifence->work); - } else { - ifence->base.base.error = ifence->fence->error; - dma_fence_signal(&ifence->base.base); - dma_fence_put(&ifence->base.base); - } - dma_fence_put(ifence->fence); -} - -static void invalidation_fence_work_func(struct work_struct *w) -{ - struct invalidation_fence *ifence = - container_of(w, struct invalidation_fence, work); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); - xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, - ifence->end, ifence->asid); -} - -static int invalidation_fence_init(struct xe_gt *gt, - struct invalidation_fence *ifence, - struct dma_fence *fence, - u64 start, u64 end, u32 asid) -{ - int ret; - - trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - - ifence->fence = fence; - ifence->gt = gt; - ifence->start = start; - ifence->end = end; - ifence->asid = asid; - - INIT_WORK(&ifence->work, invalidation_fence_work_func); - ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); - if (ret == -ENOENT) { - dma_fence_put(ifence->fence); /* Usually dropped in CB */ - invalidation_fence_work_func(&ifence->work); - } else if (ret) { - dma_fence_put(&ifence->base.base); /* Caller ref */ - dma_fence_put(&ifence->base.base); /* Creation ref */ - } - - xe_gt_assert(gt, !ret || ret == -ENOENT); - - return ret && ret != -ENOENT ? ret : 0; -} - -static void xe_pt_calc_rfence_interval(struct xe_vma *vma, - struct xe_pt_migrate_pt_update *update, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - int i, level = 0; - - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - update->base.start = ALIGN_DOWN(xe_vma_start(vma), - 0x1ull << xe_pt_shift(level)); - update->base.last = ALIGN(xe_vma_end(vma), - 0x1ull << xe_pt_shift(level)) - 1; -} - -/** - * __xe_pt_bind_vma() - Build and connect a page-table tree for the vma - * address range. - * @tile: The tile to bind for. - * @vma: The vma to bind. - * @q: The exec_queue with which to do pipelined page-table updates. - * @syncs: Entries to sync on before binding the built tree to the live vm tree. - * @num_syncs: Number of @sync entries. - * @rebind: Whether we're rebinding this vma to the same address range without - * an unbind in-between. - * - * This function builds a page-table tree (see xe_pt_stage_bind() for more - * information on page-table building), and the xe_vm_pgtable_update entries - * abstracting the operations needed to attach it to the main vm tree. It - * then takes the relevant locks and updates the metadata side of the main - * vm tree and submits the operations for pipelined attachment of the - * gpu page-table to the vm main tree, (which can be done either by the - * cpu and the GPU). - * - * Return: A valid dma-fence representing the pipelined attachment operation - * on success, an error pointer on error. - */ -struct dma_fence * -__xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs, - bool rebind) -{ - struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; - struct xe_pt_migrate_pt_update bind_pt_update = { - .base = { - .ops = xe_vma_is_userptr(vma) ? &userptr_bind_ops : &bind_ops, - .vma = vma, - .tile_id = tile->id, - }, - .bind = true, - }; - struct xe_vm *vm = xe_vma_vm(vma); - u32 num_entries; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL; - struct xe_range_fence *rfence; - int err; - - bind_pt_update.locked = false; - xe_bo_assert_held(xe_vma_bo(vma)); - xe_vm_assert_held(vm); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma), q); - - err = xe_pt_prepare_bind(tile, vma, entries, &num_entries); - if (err) - goto err; - - err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); - if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); - if (err) - goto err; - - xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); - - xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); - xe_pt_calc_rfence_interval(vma, &bind_pt_update, entries, - num_entries); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. on LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * on !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) - return ERR_PTR(-ENOMEM); - } else if (rebind && !xe_vm_in_lr_mode(vm)) { - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - } - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - kfree(ifence); - return ERR_PTR(-ENOMEM); - } - - fence = xe_migrate_update_pgtables(tile->migrate, - vm, xe_vma_bo(vma), q, - entries, num_entries, - syncs, num_syncs, - &bind_pt_update.base); - if (!IS_ERR(fence)) { - bool last_munmap_rebind = vma->gpuva.flags & XE_VMA_LAST_REBIND; - LLIST_HEAD(deferred); - int err; - - err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - bind_pt_update.base.start, - bind_pt_update.base.last, fence); - if (err) - dma_fence_wait(fence, false); - - /* TLB invalidation must be done before signaling rebind */ - if (ifence) { - int err = invalidation_fence_init(tile->primary_gt, - ifence, fence, - xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); - if (err) { - dma_fence_put(fence); - kfree(ifence); - return ERR_PTR(err); - } - fence = &ifence->base.base; - } - - /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, rebind || - last_munmap_rebind ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - DMA_RESV_USAGE_BOOKKEEP); - xe_pt_commit_bind(vma, entries, num_entries, rebind, - bind_pt_update.locked ? &deferred : NULL); - - /* This vma is live (again?) now */ - vma->tile_present |= BIT(tile->id); - - if (bind_pt_update.locked) { - to_userptr_vma(vma)->userptr.initial_bind = true; - up_read(&vm->userptr.notifier_lock); - xe_bo_put_commit(&deferred); - } - if (!rebind && last_munmap_rebind && - xe_vm_in_preempt_fence_mode(vm)) - xe_vm_queue_rebind_worker(vm); - } else { - kfree(rfence); - kfree(ifence); - if (bind_pt_update.locked) - up_read(&vm->userptr.notifier_lock); - xe_pt_abort_bind(vma, entries, num_entries); - } - - return fence; - -err: - return ERR_PTR(err); -} - -struct xe_pt_stage_unbind_walk { - /** @base: The pagewalk base-class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're unbinding from. */ - struct xe_tile *tile; - - /** - * @modified_start: Walk range start, modified to include any - * shared pagetables that we're the only user of and can thus - * treat as private. - */ - u64 modified_start; - /** @modified_end: Walk range start, modified like @modified_start. */ - u64 modified_end; - - /* Output */ - /* @wupd: Structure to track the page-table updates we're building */ - struct xe_walk_update wupd; -}; - -/* - * Check whether this range is the only one populating this pagetable, - * and in that case, update the walk range checks so that higher levels don't - * view us as a shared pagetable. - */ -static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, - const struct xe_pt *child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - unsigned int shift = walk->shifts[level]; - u64 size = 1ull << shift; - - if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && - ((next - addr) >> shift) == child->num_live) { - u64 size = 1ull << walk->shifts[level + 1]; - - *action = ACTION_CONTINUE; - - if (xe_walk->modified_start >= addr) - xe_walk->modified_start = round_down(addr, size); - if (xe_walk->modified_end <= next) - xe_walk->modified_end = round_up(next, size); - - return true; - } - - return false; -} - -static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); - - return 0; -} - -static int -xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - u64 size = 1ull << walk->shifts[--level]; - - if (!IS_ALIGNED(addr, size)) - addr = xe_walk->modified_start; - if (!IS_ALIGNED(next, size)) - next = xe_walk->modified_end; - - /* Parent == *child is the root pt. Don't kill it. */ - if (parent != *child && - xe_pt_check_kill(addr, next, level, xe_child, action, walk)) - return 0; - - if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, - &end_offset)) - return 0; - - (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false); - xe_walk->wupd.updates[level].update->qwords = end_offset - offset; - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { - .pt_entry = xe_pt_stage_unbind_entry, - .pt_post_descend = xe_pt_stage_unbind_post_descend, -}; - -/** - * xe_pt_stage_unbind() - Build page-table update structures for an unbind - * operation - * @tile: The tile we're unbinding for. - * @vma: The vma we're unbinding. - * @entries: Caller-provided storage for the update structures. - * - * Builds page-table update structures for an unbind operation. The function - * will attempt to remove all page-tables that we're the only user - * of, and for that to work, the unbind operation must be committed in the - * same critical section that blocks racing binds to the same page-table tree. - * - * Return: The number of entries used. - */ -static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries) -{ - struct xe_pt_stage_unbind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_unbind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - .modified_start = xe_vma_start(vma), - .modified_end = xe_vma_end(vma), - .wupd.entries = entries, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.wupd.num_used_entries; -} - -static void -xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, - struct xe_tile *tile, struct iosys_map *map, - void *ptr, u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_vma *vma = pt_update->vma; - u64 empty = __xe_pt_empty_pte(tile, xe_vma_vm(vma), update->pt->level); - int i; - - if (map && map->is_iomem) - for (i = 0; i < num_qwords; ++i) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, empty); - else if (map) - memset64(map->vaddr + qword_ofs * sizeof(u64), empty, - num_qwords); - else - memset64(ptr, empty, num_qwords); -} - -static void -xe_pt_commit_unbind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 num_entries, - struct llist_head *deferred) -{ - u32 j; - - xe_pt_commit_locks_assert(vma); - - for (j = 0; j < num_entries; ++j) { - struct xe_vm_pgtable_update *entry = &entries[j]; - struct xe_pt *pt = entry->pt; - - pt->num_live -= entry->qwords; - if (pt->level) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - u32 i; - - for (i = entry->ofs; i < entry->ofs + entry->qwords; - i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[i] = NULL; - } - } - } -} - -<<<<<<< -static const struct xe_migrate_pt_update_ops unbind_ops = { - .populate = xe_migrate_clear_pgtable_callback, -======= -static void -xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int i, level = 0; - u64 start, last; - - for (i = 0; i < pt_op->num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &pt_op->entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level)); - last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1; - - if (start < pt_update_ops->start) - pt_update_ops->start = start; - if (last > pt_update_ops->last) - pt_update_ops->last = last; -} - -static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) -{ - int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0; - - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, - xe->info.tile_count << shift); - - return 0; -} - -static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int err; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - pt_op->vma = NULL; - pt_op->bind = true; - pt_op->rebind = BIT(tile->id) & vma->tile_present; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - err = xe_pt_prepare_bind(tile, vma, pt_op->entries, - &pt_op->num_entries); - if (!err) { - xe_tile_assert(tile, pt_op->num_entries <= - ARRAY_SIZE(pt_op->entries)); - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, true); - - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. On LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * On !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!pt_op->rebind && xe_vm_has_scratch(vm) && - xe_vm_in_preempt_fence_mode(vm))) - pt_update_ops->needs_invalidation = true; - else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - - vma->tile_staged |= BIT(tile->id); - pt_op->vma = vma; - xe_pt_commit_prepare_bind(vma, pt_op->entries, - pt_op->num_entries, pt_op->rebind); - } else { - xe_pt_cancel_bind(vma, pt_op->entries, pt_op->num_entries); - } - - return err; -} - -static int unbind_op_prepare(struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int err; - - if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id))) - return 0; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - /* - * Wait for invalidation to complete. Can corrupt internal page table - * state if an invalidation is running while preparing an unbind. - */ - if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma))) - mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier); - - pt_op->vma = vma; - pt_op->bind = false; - pt_op->rebind = false; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries); - - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, false); - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - pt_update_ops->needs_invalidation = true; - - xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries); - - return 0; -} - -static int op_prepare(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op) -{ - int err = 0; - - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma); - pt_update_ops->wait_vm_kernel = true; - break; - case DRM_GPUVA_OP_REMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va)); - - if (!err && op->remap.prev) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.prev); - pt_update_ops->wait_vm_bookkeep = true; - } - if (!err && op->remap.next) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.next); - pt_update_ops->wait_vm_bookkeep = true; - } - break; - case DRM_GPUVA_OP_UNMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va)); - break; - case DRM_GPUVA_OP_PREFETCH: - err = bind_op_prepare(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va)); - pt_update_ops->wait_vm_kernel = true; - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static void -xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops) -{ - init_llist_head(&pt_update_ops->deferred); - pt_update_ops->start = ~0x0ull; - pt_update_ops->last = 0x0ull; -} - -/** - * xe_pt_update_ops_prepare() - Prepare PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Prepare PT update operations which includes updating internal PT state, - * allocate memory for page tables, populate page table being pruned in, and - * create PT update operations for leaf insertion / removal. - * - * Return: 0 on success, negative error code on error. - */ -int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct xe_vma_op *op; - int shift = tile->media_gt ? 1 : 0; - int err; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - xe_pt_update_ops_init(pt_update_ops); - - err = dma_resv_reserve_fences(xe_vm_resv(vops->vm), - tile_to_xe(tile)->info.tile_count << shift); - if (err) - return err; - - list_for_each_entry(op, &vops->list, link) { - err = op_prepare(vops->vm, tile, pt_update_ops, op); - - if (err) - return err; - } - - xe_tile_assert(tile, pt_update_ops->current_op <= - pt_update_ops->num_ops); - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE) - return -ENOSPC; -#endif - - return 0; -} - -static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present |= BIT(tile->id); - vma->tile_staged &= ~BIT(tile->id); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - to_userptr_vma(vma)->userptr.initial_bind = true; - } - - /* - * Kick rebind worker if this bind triggers preempt fences and not in - * the rebind worker - */ - if (pt_update_ops->wait_vm_bookkeep && - xe_vm_in_preempt_fence_mode(vm) && - !current->mm) - xe_vm_queue_rebind_worker(vm); -} - -static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present &= ~BIT(tile->id); - if (!vma->tile_present) { - list_del_init(&vma->combined_links.rebind); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - } -} - -static void op_commit(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op, struct dma_fence *fence, - struct dma_fence *fence2) -{ - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, - fence2); - break; - case DRM_GPUVA_OP_REMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence, - fence2); - - if (op->remap.prev) - bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence, fence2); - if (op->remap.next) - bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence, fence2); - break; - case DRM_GPUVA_OP_UNMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence, fence2); - break; - case DRM_GPUVA_OP_PREFETCH: - bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence, fence2); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } -} - -static const struct xe_migrate_pt_update_ops migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, ->>>>>>> - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_unbind_ops = { - .populate = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -/** - * __xe_pt_unbind_vma() - Disconnect and free a page-table tree for the vma - * address range. - * @tile: The tile to unbind for. - * @vma: The vma to unbind. - * @q: The exec_queue with which to do pipelined page-table updates. - * @syncs: Entries to sync on before disconnecting the tree to be destroyed. - * @num_syncs: Number of @sync entries. - * - * This function builds a the xe_vm_pgtable_update entries abstracting the - * operations needed to detach the page-table tree to be destroyed from the - * man vm tree. - * It then takes the relevant locks and submits the operations for - * pipelined detachment of the gpu page-table from the vm main tree, - * (which can be done either by the cpu and the GPU), Finally it frees the - * detached page-table tree. - * - * Return: A valid dma-fence representing the pipelined detachment operation - * on success, an error pointer on error. - */ -struct dma_fence * -__xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs) -{ -<<<<<<< - struct xe_vm *vm = vops->vm; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL, *mfence = NULL; - struct dma_fence **fences = NULL; - struct dma_fence_array *cf = NULL; - struct xe_range_fence *rfence; - struct xe_vma_op *op; - int err = 0, i; - struct xe_migrate_pt_update update = { - .ops = pt_update_ops->needs_userptr_lock ? - &userptr_migrate_ops : - &migrate_ops, - .vops = vops, - .tile_id = tile->id, -======= - struct xe_vm_pgtable_update entries[XE_VM_MAX_LEVEL * 2 + 1]; - struct xe_pt_migrate_pt_update unbind_pt_update = { - .base = { - .ops = xe_vma_is_userptr(vma) ? &userptr_unbind_ops : - &unbind_ops, - .vma = vma, - .tile_id = tile->id, - }, ->>>>>>> - }; - struct xe_vm *vm = xe_vma_vm(vma); - u32 num_entries; - struct dma_fence *fence = NULL; - struct invalidation_fence *ifence; - struct xe_range_fence *rfence; - int err; - - LLIST_HEAD(deferred); - - xe_bo_assert_held(xe_vma_bo(vma)); - xe_vm_assert_held(vm); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx) engine %p.\n", - xe_vma_start(vma), xe_vma_end(vma), q); - - num_entries = xe_pt_stage_unbind(tile, vma, entries); - xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries)); - - xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries); - xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries, - num_entries); - -<<<<<<< - err = dma_resv_reserve_fences(xe_vm_resv(vm), 1); - if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1); - if (err) - return ERR_PTR(err); - - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) - return ERR_PTR(-ENOMEM); -======= - if (pt_update_ops->needs_invalidation) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) { - err = -ENOMEM; - goto kill_vm_tile1; - } - if (tile->media_gt) { - mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!mfence) { - err = -ENOMEM; - goto free_ifence; - } - fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL); - if (!fences) { - err = -ENOMEM; - goto free_ifence; - } - cf = dma_fence_array_alloc(2); - if (!cf) { - err = -ENOMEM; - goto free_ifence; - } - } - } ->>>>>>> - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - kfree(ifence); - return ERR_PTR(-ENOMEM); - } - - /* - * Even if we were already evicted and unbind to destroy, we need to - * clear again here. The eviction may have updated pagetables at a - * lower level, because it needs to be more conservative. - */ - fence = xe_migrate_update_pgtables(tile->migrate, - vm, NULL, q ? q : - vm->q[tile->id], - entries, num_entries, - syncs, num_syncs, - &unbind_pt_update.base); - if (!IS_ERR(fence)) { - int err; - - err = xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - unbind_pt_update.base.start, - unbind_pt_update.base.last, fence); - if (err) - dma_fence_wait(fence, false); - -<<<<<<< - /* TLB invalidation must be done before signaling unbind */ - err = invalidation_fence_init(tile->primary_gt, ifence, fence, - xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); - if (err) { - dma_fence_put(fence); - kfree(ifence); - return ERR_PTR(err); - } - fence = &ifence->base.base; - - /* add shared fence now for pagetable delayed destroy */ - dma_resv_add_fence(xe_vm_resv(vm), fence, - DMA_RESV_USAGE_BOOKKEEP); - - /* This fence will be installed by caller when doing eviction */ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - DMA_RESV_USAGE_BOOKKEEP); - xe_pt_commit_unbind(vma, entries, num_entries, - unbind_pt_update.locked ? &deferred : NULL); - vma->tile_present &= ~BIT(tile->id); - } else { - kfree(rfence); - kfree(ifence); - } - - if (!vma->tile_present) - list_del_init(&vma->combined_links.rebind); - - if (unbind_pt_update.locked) { - xe_tile_assert(tile, xe_vma_is_userptr(vma)); -======= - xe_pt_commit(pt_op->vma, pt_op->entries, - pt_op->num_entries, &pt_update_ops->deferred); - pt_op->vma = NULL; /* skip in xe_pt_update_ops_abort */ - } - - if (xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - pt_update_ops->start, - pt_update_ops->last, fence)) - dma_fence_wait(fence, false); - - /* tlb invalidation must be done before signaling rebind */ - if (ifence) { - if (mfence) - dma_fence_get(fence); - invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - if (mfence) { - invalidation_fence_init(tile->media_gt, mfence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - fences[0] = &ifence->base.base; - fences[1] = &mfence->base.base; - dma_fence_array_init(cf, 2, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - fence = &cf->base; - } else { - fence = &ifence->base.base; - } - } - - if (!mfence) { - dma_resv_add_fence(xe_vm_resv(vm), fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); - } else { - dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, - &ifence->base.base, &mfence->base.base); - } ->>>>>>> - - if (!vma->tile_present) { - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - up_read(&vm->userptr.notifier_lock); - xe_bo_put_commit(&deferred); - } - - return fence; -<<<<<<< -======= - -free_rfence: - kfree(rfence); -free_ifence: - kfree(cf); - kfree(fences); - kfree(mfence); - kfree(ifence); -kill_vm_tile1: - if (err != -EAGAIN && tile->id) - xe_vm_kill(vops->vm, false); - - return ERR_PTR(err); -} - -/** - * xe_pt_update_ops_fini() - Finish PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operations - * - * Finish PT update operations by committing to destroy page table memory - */ -void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - for (i = 0; i < pt_update_ops->current_op; ++i) { - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; - - xe_pt_free_bind(pt_op->entries, pt_op->num_entries); - } - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); -} - -/** - * xe_pt_update_ops_abort() - Abort PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Abort PT update operations by unwinding internal PT state - */ -void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - for (i = pt_update_ops->num_ops - 1; i >= 0; --i) { - struct xe_vm_pgtable_update_op *pt_op = - &pt_update_ops->ops[i]; - - if (!pt_op->vma || i >= pt_update_ops->current_op) - continue; - - if (pt_op->bind) - xe_pt_abort_bind(pt_op->vma, pt_op->entries, - pt_op->num_entries, - pt_op->rebind); - else - xe_pt_abort_unbind(pt_op->vma, pt_op->entries, - pt_op->num_entries); - } - - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); ->>>>>>> -} diff --git a/rr-cache/5980f28fc8b8b4e17cc44eca73291563199fe0b9/postimage b/rr-cache/5980f28fc8b8b4e17cc44eca73291563199fe0b9/postimage deleted file mode 100644 index 29b56d53a340..000000000000 --- a/rr-cache/5980f28fc8b8b4e17cc44eca73291563199fe0b9/postimage +++ /dev/null @@ -1,298 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2018 Intel Corporation - */ - -#include <linux/dmi.h> - -#include "i915_drv.h" -#include "intel_display_types.h" -#include "intel_quirks.h" - -static void intel_set_quirk(struct intel_display *display, enum intel_quirk_id quirk) -{ - display->quirks.mask |= BIT(quirk); -} - -static void intel_set_dpcd_quirk(struct intel_dp *intel_dp, enum intel_quirk_id quirk) -{ - intel_dp->quirks.mask |= BIT(quirk); -} - -/* - * Some machines (Lenovo U160) do not work with SSC on LVDS for some reason - */ -static void quirk_ssc_force_disable(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_LVDS_SSC_DISABLE); - drm_info(display->drm, "applying lvds SSC disable quirk\n"); -} - -/* - * A machine (e.g. Acer Aspire 5734Z) may need to invert the panel backlight - * brightness value - */ -static void quirk_invert_brightness(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_INVERT_BRIGHTNESS); - drm_info(display->drm, "applying inverted panel brightness quirk\n"); -} - -/* Some VBT's incorrectly indicate no backlight is present */ -static void quirk_backlight_present(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_BACKLIGHT_PRESENT); - drm_info(display->drm, "applying backlight present quirk\n"); -} - -/* Toshiba Satellite P50-C-18C requires T12 delay to be min 800ms - * which is 300 ms greater than eDP spec T12 min. - */ -static void quirk_increase_t12_delay(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_INCREASE_T12_DELAY); - drm_info(display->drm, "Applying T12 delay quirk\n"); -} - -/* - * GeminiLake NUC HDMI outputs require additional off time - * this allows the onboard retimer to correctly sync to signal - */ -static void quirk_increase_ddi_disabled_time(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_INCREASE_DDI_DISABLED_TIME); - drm_info(display->drm, "Applying Increase DDI Disabled quirk\n"); -} - -static void quirk_no_pps_backlight_power_hook(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK); - drm_info(display->drm, "Applying no pps backlight power quirk\n"); -} - -static void quirk_fw_sync_len(struct intel_dp *intel_dp) -{ - struct intel_display *display = to_intel_display(intel_dp); - - intel_set_dpcd_quirk(intel_dp, QUIRK_FW_SYNC_LEN); - drm_info(display->drm, "Applying Fast Wake sync pulse count quirk\n"); -} - -struct intel_quirk { - int device; - int subsystem_vendor; - int subsystem_device; - void (*hook)(struct intel_display *display); -}; - -struct intel_dpcd_quirk { - int device; - int subsystem_vendor; - int subsystem_device; - u8 sink_oui[3]; - u8 sink_device_id[6]; - void (*hook)(struct intel_dp *intel_dp); -}; - -#define SINK_OUI(first, second, third) { (first), (second), (third) } -#define SINK_DEVICE_ID(first, second, third, fourth, fifth, sixth) \ - { (first), (second), (third), (fourth), (fifth), (sixth) } - -#define SINK_DEVICE_ID_ANY SINK_DEVICE_ID(0, 0, 0, 0, 0, 0) - -/* For systems that don't have a meaningful PCI subdevice/subvendor ID */ -struct intel_dmi_quirk { - void (*hook)(struct intel_display *display); - const struct dmi_system_id (*dmi_id_list)[]; -}; - -static int intel_dmi_reverse_brightness(const struct dmi_system_id *id) -{ - DRM_INFO("Backlight polarity reversed on %s\n", id->ident); - return 1; -} - -static int intel_dmi_no_pps_backlight(const struct dmi_system_id *id) -{ - DRM_INFO("No pps backlight support on %s\n", id->ident); - return 1; -} - -static const struct intel_dmi_quirk intel_dmi_quirks[] = { - { - .dmi_id_list = &(const struct dmi_system_id[]) { - { - .callback = intel_dmi_reverse_brightness, - .ident = "NCR Corporation", - .matches = {DMI_MATCH(DMI_SYS_VENDOR, "NCR Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, ""), - }, - }, - { - .callback = intel_dmi_reverse_brightness, - .ident = "Thundersoft TST178 tablet", - /* DMI strings are too generic, also match on BIOS date */ - .matches = {DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "Aptio CRB"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "To be filled by O.E.M."), - DMI_EXACT_MATCH(DMI_BIOS_DATE, "04/15/2014"), - }, - }, - { } /* terminating entry */ - }, - .hook = quirk_invert_brightness, - }, - { - .dmi_id_list = &(const struct dmi_system_id[]) { - { - .callback = intel_dmi_no_pps_backlight, - .ident = "Google Lillipup sku524294", - .matches = {DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Google"), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "Lindar"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "sku524294"), - }, - }, - { - .callback = intel_dmi_no_pps_backlight, - .ident = "Google Lillipup sku524295", - .matches = {DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Google"), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "Lindar"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "sku524295"), - }, - }, - { } - }, - .hook = quirk_no_pps_backlight_power_hook, - }, -}; - -static struct intel_quirk intel_quirks[] = { - /* Lenovo U160 cannot use SSC on LVDS */ - { 0x0046, 0x17aa, 0x3920, quirk_ssc_force_disable }, - - /* Sony Vaio Y cannot use SSC on LVDS */ - { 0x0046, 0x104d, 0x9076, quirk_ssc_force_disable }, - - /* Acer Aspire 5734Z must invert backlight brightness */ - { 0x2a42, 0x1025, 0x0459, quirk_invert_brightness }, - - /* Acer/eMachines G725 */ - { 0x2a42, 0x1025, 0x0210, quirk_invert_brightness }, - - /* Acer/eMachines e725 */ - { 0x2a42, 0x1025, 0x0212, quirk_invert_brightness }, - - /* Acer/Packard Bell NCL20 */ - { 0x2a42, 0x1025, 0x034b, quirk_invert_brightness }, - - /* Acer Aspire 4736Z */ - { 0x2a42, 0x1025, 0x0260, quirk_invert_brightness }, - - /* Acer Aspire 5336 */ - { 0x2a42, 0x1025, 0x048a, quirk_invert_brightness }, - - /* Acer C720 and C720P Chromebooks (Celeron 2955U) have backlights */ - { 0x0a06, 0x1025, 0x0a11, quirk_backlight_present }, - - /* Acer C720 Chromebook (Core i3 4005U) */ - { 0x0a16, 0x1025, 0x0a11, quirk_backlight_present }, - - /* Apple Macbook 2,1 (Core 2 T7400) */ - { 0x27a2, 0x8086, 0x7270, quirk_backlight_present }, - - /* Apple Macbook 4,1 */ - { 0x2a02, 0x106b, 0x00a1, quirk_backlight_present }, - - /* Toshiba CB35 Chromebook (Celeron 2955U) */ - { 0x0a06, 0x1179, 0x0a88, quirk_backlight_present }, - - /* HP Chromebook 14 (Celeron 2955U) */ - { 0x0a06, 0x103c, 0x21ed, quirk_backlight_present }, - - /* Dell Chromebook 11 */ - { 0x0a06, 0x1028, 0x0a35, quirk_backlight_present }, - - /* Dell Chromebook 11 (2015 version) */ - { 0x0a16, 0x1028, 0x0a35, quirk_backlight_present }, - - /* Toshiba Satellite P50-C-18C */ - { 0x191B, 0x1179, 0xF840, quirk_increase_t12_delay }, - - /* GeminiLake NUC */ - { 0x3185, 0x8086, 0x2072, quirk_increase_ddi_disabled_time }, - { 0x3184, 0x8086, 0x2072, quirk_increase_ddi_disabled_time }, - /* ASRock ITX*/ - { 0x3185, 0x1849, 0x2212, quirk_increase_ddi_disabled_time }, - { 0x3184, 0x1849, 0x2212, quirk_increase_ddi_disabled_time }, - /* ECS Liva Q2 */ - { 0x3185, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time }, - { 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time }, - /* HP Notebook - 14-r206nv */ - { 0x0f31, 0x103c, 0x220f, quirk_invert_brightness }, -}; - -static struct intel_dpcd_quirk intel_dpcd_quirks[] = { - /* Dell Precision 5490 */ - { - .device = 0x7d55, - .subsystem_vendor = 0x1028, - .subsystem_device = 0x0cc7, - .sink_oui = SINK_OUI(0x38, 0xec, 0x11), - .hook = quirk_fw_sync_len, - }, - -}; - -void intel_init_quirks(struct intel_display *display) -{ - struct pci_dev *d = to_pci_dev(display->drm->dev); - int i; - - for (i = 0; i < ARRAY_SIZE(intel_quirks); i++) { - struct intel_quirk *q = &intel_quirks[i]; - - if (d->device == q->device && - (d->subsystem_vendor == q->subsystem_vendor || - q->subsystem_vendor == PCI_ANY_ID) && - (d->subsystem_device == q->subsystem_device || - q->subsystem_device == PCI_ANY_ID)) - q->hook(display); - } - for (i = 0; i < ARRAY_SIZE(intel_dmi_quirks); i++) { - if (dmi_check_system(*intel_dmi_quirks[i].dmi_id_list) != 0) - intel_dmi_quirks[i].hook(display); - } -} - -void intel_init_dpcd_quirks(struct intel_dp *intel_dp, - const struct drm_dp_dpcd_ident *ident) -{ - struct intel_display *display = to_intel_display(intel_dp); - struct pci_dev *d = to_pci_dev(display->drm->dev); - int i; - - for (i = 0; i < ARRAY_SIZE(intel_dpcd_quirks); i++) { - struct intel_dpcd_quirk *q = &intel_dpcd_quirks[i]; - - if (d->device == q->device && - (d->subsystem_vendor == q->subsystem_vendor || - q->subsystem_vendor == PCI_ANY_ID) && - (d->subsystem_device == q->subsystem_device || - q->subsystem_device == PCI_ANY_ID) && - !memcmp(q->sink_oui, ident->oui, sizeof(ident->oui)) && - (!memcmp(q->sink_device_id, ident->device_id, - sizeof(ident->device_id)) || - mem_is_zero(q->sink_device_id, sizeof(q->sink_device_id)))) - q->hook(intel_dp); - } -} - -bool intel_has_quirk(struct intel_display *display, enum intel_quirk_id quirk) -{ - return display->quirks.mask & BIT(quirk); -} - -bool intel_has_dpcd_quirk(struct intel_dp *intel_dp, enum intel_quirk_id quirk) -{ - return intel_dp->quirks.mask & BIT(quirk); -} diff --git a/rr-cache/5980f28fc8b8b4e17cc44eca73291563199fe0b9/preimage b/rr-cache/5980f28fc8b8b4e17cc44eca73291563199fe0b9/preimage deleted file mode 100644 index 1f0bb85affea..000000000000 --- a/rr-cache/5980f28fc8b8b4e17cc44eca73291563199fe0b9/preimage +++ /dev/null @@ -1,302 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2018 Intel Corporation - */ - -#include <linux/dmi.h> - -#include "i915_drv.h" -#include "intel_display_types.h" -#include "intel_quirks.h" - -static void intel_set_quirk(struct intel_display *display, enum intel_quirk_id quirk) -{ - display->quirks.mask |= BIT(quirk); -} - -static void intel_set_dpcd_quirk(struct intel_dp *intel_dp, enum intel_quirk_id quirk) -{ - intel_dp->quirks.mask |= BIT(quirk); -} - -/* - * Some machines (Lenovo U160) do not work with SSC on LVDS for some reason - */ -static void quirk_ssc_force_disable(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_LVDS_SSC_DISABLE); - drm_info(display->drm, "applying lvds SSC disable quirk\n"); -} - -/* - * A machine (e.g. Acer Aspire 5734Z) may need to invert the panel backlight - * brightness value - */ -static void quirk_invert_brightness(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_INVERT_BRIGHTNESS); - drm_info(display->drm, "applying inverted panel brightness quirk\n"); -} - -/* Some VBT's incorrectly indicate no backlight is present */ -static void quirk_backlight_present(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_BACKLIGHT_PRESENT); - drm_info(display->drm, "applying backlight present quirk\n"); -} - -/* Toshiba Satellite P50-C-18C requires T12 delay to be min 800ms - * which is 300 ms greater than eDP spec T12 min. - */ -static void quirk_increase_t12_delay(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_INCREASE_T12_DELAY); - drm_info(display->drm, "Applying T12 delay quirk\n"); -} - -/* - * GeminiLake NUC HDMI outputs require additional off time - * this allows the onboard retimer to correctly sync to signal - */ -static void quirk_increase_ddi_disabled_time(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_INCREASE_DDI_DISABLED_TIME); - drm_info(display->drm, "Applying Increase DDI Disabled quirk\n"); -} - -static void quirk_no_pps_backlight_power_hook(struct intel_display *display) -{ - intel_set_quirk(display, QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK); - drm_info(display->drm, "Applying no pps backlight power quirk\n"); -} - -static void quirk_fw_sync_len(struct intel_dp *intel_dp) -{ - struct intel_display *display = to_intel_display(intel_dp); - - intel_set_dpcd_quirk(intel_dp, QUIRK_FW_SYNC_LEN); - drm_info(display->drm, "Applying Fast Wake sync pulse count quirk\n"); -} - -struct intel_quirk { - int device; - int subsystem_vendor; - int subsystem_device; - void (*hook)(struct intel_display *display); -}; - -struct intel_dpcd_quirk { - int device; - int subsystem_vendor; - int subsystem_device; - u8 sink_oui[3]; - u8 sink_device_id[6]; - void (*hook)(struct intel_dp *intel_dp); -}; - -#define SINK_OUI(first, second, third) { (first), (second), (third) } -#define SINK_DEVICE_ID(first, second, third, fourth, fifth, sixth) \ - { (first), (second), (third), (fourth), (fifth), (sixth) } - -#define SINK_DEVICE_ID_ANY SINK_DEVICE_ID(0, 0, 0, 0, 0, 0) - -/* For systems that don't have a meaningful PCI subdevice/subvendor ID */ -struct intel_dmi_quirk { - void (*hook)(struct intel_display *display); - const struct dmi_system_id (*dmi_id_list)[]; -}; - -static int intel_dmi_reverse_brightness(const struct dmi_system_id *id) -{ - DRM_INFO("Backlight polarity reversed on %s\n", id->ident); - return 1; -} - -static int intel_dmi_no_pps_backlight(const struct dmi_system_id *id) -{ - DRM_INFO("No pps backlight support on %s\n", id->ident); - return 1; -} - -static const struct intel_dmi_quirk intel_dmi_quirks[] = { - { - .dmi_id_list = &(const struct dmi_system_id[]) { - { - .callback = intel_dmi_reverse_brightness, - .ident = "NCR Corporation", - .matches = {DMI_MATCH(DMI_SYS_VENDOR, "NCR Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, ""), - }, - }, - { - .callback = intel_dmi_reverse_brightness, - .ident = "Thundersoft TST178 tablet", - /* DMI strings are too generic, also match on BIOS date */ - .matches = {DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "Aptio CRB"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "To be filled by O.E.M."), - DMI_EXACT_MATCH(DMI_BIOS_DATE, "04/15/2014"), - }, - }, - { } /* terminating entry */ - }, - .hook = quirk_invert_brightness, - }, - { - .dmi_id_list = &(const struct dmi_system_id[]) { - { - .callback = intel_dmi_no_pps_backlight, - .ident = "Google Lillipup sku524294", - .matches = {DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Google"), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "Lindar"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "sku524294"), - }, - }, - { - .callback = intel_dmi_no_pps_backlight, - .ident = "Google Lillipup sku524295", - .matches = {DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Google"), - DMI_EXACT_MATCH(DMI_BOARD_NAME, "Lindar"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "sku524295"), - }, - }, - { } - }, - .hook = quirk_no_pps_backlight_power_hook, - }, -}; - -static struct intel_quirk intel_quirks[] = { - /* Lenovo U160 cannot use SSC on LVDS */ - { 0x0046, 0x17aa, 0x3920, quirk_ssc_force_disable }, - - /* Sony Vaio Y cannot use SSC on LVDS */ - { 0x0046, 0x104d, 0x9076, quirk_ssc_force_disable }, - - /* Acer Aspire 5734Z must invert backlight brightness */ - { 0x2a42, 0x1025, 0x0459, quirk_invert_brightness }, - - /* Acer/eMachines G725 */ - { 0x2a42, 0x1025, 0x0210, quirk_invert_brightness }, - - /* Acer/eMachines e725 */ - { 0x2a42, 0x1025, 0x0212, quirk_invert_brightness }, - - /* Acer/Packard Bell NCL20 */ - { 0x2a42, 0x1025, 0x034b, quirk_invert_brightness }, - - /* Acer Aspire 4736Z */ - { 0x2a42, 0x1025, 0x0260, quirk_invert_brightness }, - - /* Acer Aspire 5336 */ - { 0x2a42, 0x1025, 0x048a, quirk_invert_brightness }, - - /* Acer C720 and C720P Chromebooks (Celeron 2955U) have backlights */ - { 0x0a06, 0x1025, 0x0a11, quirk_backlight_present }, - - /* Acer C720 Chromebook (Core i3 4005U) */ - { 0x0a16, 0x1025, 0x0a11, quirk_backlight_present }, - - /* Apple Macbook 2,1 (Core 2 T7400) */ - { 0x27a2, 0x8086, 0x7270, quirk_backlight_present }, - - /* Apple Macbook 4,1 */ - { 0x2a02, 0x106b, 0x00a1, quirk_backlight_present }, - - /* Toshiba CB35 Chromebook (Celeron 2955U) */ - { 0x0a06, 0x1179, 0x0a88, quirk_backlight_present }, - - /* HP Chromebook 14 (Celeron 2955U) */ - { 0x0a06, 0x103c, 0x21ed, quirk_backlight_present }, - - /* Dell Chromebook 11 */ - { 0x0a06, 0x1028, 0x0a35, quirk_backlight_present }, - - /* Dell Chromebook 11 (2015 version) */ - { 0x0a16, 0x1028, 0x0a35, quirk_backlight_present }, - - /* Toshiba Satellite P50-C-18C */ - { 0x191B, 0x1179, 0xF840, quirk_increase_t12_delay }, - - /* GeminiLake NUC */ - { 0x3185, 0x8086, 0x2072, quirk_increase_ddi_disabled_time }, - { 0x3184, 0x8086, 0x2072, quirk_increase_ddi_disabled_time }, - /* ASRock ITX*/ - { 0x3185, 0x1849, 0x2212, quirk_increase_ddi_disabled_time }, - { 0x3184, 0x1849, 0x2212, quirk_increase_ddi_disabled_time }, - /* ECS Liva Q2 */ - { 0x3185, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time }, - { 0x3184, 0x1019, 0xa94d, quirk_increase_ddi_disabled_time }, - /* HP Notebook - 14-r206nv */ - { 0x0f31, 0x103c, 0x220f, quirk_invert_brightness }, -}; - -static struct intel_dpcd_quirk intel_dpcd_quirks[] = { - /* Dell Precision 5490 */ - { - .device = 0x7d55, - .subsystem_vendor = 0x1028, - .subsystem_device = 0x0cc7, - .sink_oui = SINK_OUI(0x38, 0xec, 0x11), - .hook = quirk_fw_sync_len, - }, - -}; - -void intel_init_quirks(struct intel_display *display) -{ - struct pci_dev *d = to_pci_dev(display->drm->dev); - int i; - - for (i = 0; i < ARRAY_SIZE(intel_quirks); i++) { - struct intel_quirk *q = &intel_quirks[i]; - - if (d->device == q->device && - (d->subsystem_vendor == q->subsystem_vendor || - q->subsystem_vendor == PCI_ANY_ID) && - (d->subsystem_device == q->subsystem_device || - q->subsystem_device == PCI_ANY_ID)) - q->hook(display); - } - for (i = 0; i < ARRAY_SIZE(intel_dmi_quirks); i++) { - if (dmi_check_system(*intel_dmi_quirks[i].dmi_id_list) != 0) - intel_dmi_quirks[i].hook(display); - } -} - -void intel_init_dpcd_quirks(struct intel_dp *intel_dp, - const struct drm_dp_dpcd_ident *ident) -{ - struct intel_display *display = to_intel_display(intel_dp); - struct pci_dev *d = to_pci_dev(display->drm->dev); - int i; - - for (i = 0; i < ARRAY_SIZE(intel_dpcd_quirks); i++) { - struct intel_dpcd_quirk *q = &intel_dpcd_quirks[i]; - - if (d->device == q->device && - (d->subsystem_vendor == q->subsystem_vendor || - q->subsystem_vendor == PCI_ANY_ID) && - (d->subsystem_device == q->subsystem_device || - q->subsystem_device == PCI_ANY_ID) && - !memcmp(q->sink_oui, ident->oui, sizeof(ident->oui)) && - (!memcmp(q->sink_device_id, ident->device_id, - sizeof(ident->device_id)) || -<<<<<<< - !memchr_inv(q->sink_device_id, 0, sizeof(q->sink_device_id)))) -======= - mem_is_zero(q->sink_device_id, sizeof(q->sink_device_id)))) ->>>>>>> - q->hook(intel_dp); - } -} - -bool intel_has_quirk(struct intel_display *display, enum intel_quirk_id quirk) -{ - return display->quirks.mask & BIT(quirk); -} - -bool intel_has_dpcd_quirk(struct intel_dp *intel_dp, enum intel_quirk_id quirk) -{ - return intel_dp->quirks.mask & BIT(quirk); -} diff --git a/rr-cache/6e989852f5454d81ebf331bbd2c55116dc711575/preimage.5 b/rr-cache/6e989852f5454d81ebf331bbd2c55116dc711575/preimage.5 deleted file mode 100644 index 42ffb0f8cccc..000000000000 --- a/rr-cache/6e989852f5454d81ebf331bbd2c55116dc711575/preimage.5 +++ /dev/null @@ -1,982 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2021 Intel Corporation - */ - -#include "xe_exec_queue.h" - -#include <linux/nospec.h> - -#include <drm/drm_device.h> -#include <drm/drm_file.h> -#include <drm/xe_drm.h> - -#include "xe_device.h" -#include "xe_gt.h" -#include "xe_hw_engine_class_sysfs.h" -#include "xe_hw_fence.h" -#include "xe_lrc.h" -#include "xe_macros.h" -#include "xe_migrate.h" -#include "xe_pm.h" -#include "xe_ring_ops_types.h" -#include "xe_trace.h" -#include "xe_vm.h" - -enum xe_exec_queue_sched_prop { - XE_EXEC_QUEUE_JOB_TIMEOUT = 0, - XE_EXEC_QUEUE_TIMESLICE = 1, - XE_EXEC_QUEUE_PREEMPT_TIMEOUT = 2, - XE_EXEC_QUEUE_SCHED_PROP_MAX = 3, -}; - -static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, - u64 extensions, int ext_number); - -static void __xe_exec_queue_free(struct xe_exec_queue *q) -{ - if (q->vm) - xe_vm_put(q->vm); - - if (q->xef) - xe_file_put(q->xef); - - kfree(q); -} - -static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, - struct xe_vm *vm, - u32 logical_mask, - u16 width, struct xe_hw_engine *hwe, - u32 flags, u64 extensions) -{ - struct xe_exec_queue *q; - struct xe_gt *gt = hwe->gt; - int err; - - /* only kernel queues can be permanent */ - XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL)); - - q = kzalloc(struct_size(q, lrc, width), GFP_KERNEL); - if (!q) - return ERR_PTR(-ENOMEM); - - kref_init(&q->refcount); - q->flags = flags; - q->hwe = hwe; - q->gt = gt; - q->class = hwe->class; - q->width = width; - q->logical_mask = logical_mask; - q->fence_irq = >->fence_irq[hwe->class]; - q->ring_ops = gt->ring_ops[hwe->class]; - q->ops = gt->exec_queue_ops; - INIT_LIST_HEAD(&q->lr.link); - INIT_LIST_HEAD(&q->multi_gt_link); - - q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; - q->sched_props.preempt_timeout_us = - hwe->eclass->sched_props.preempt_timeout_us; - q->sched_props.job_timeout_ms = - hwe->eclass->sched_props.job_timeout_ms; - if (q->flags & EXEC_QUEUE_FLAG_KERNEL && - q->flags & EXEC_QUEUE_FLAG_HIGH_PRIORITY) - q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_KERNEL; - else - q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL; - - if (vm) - q->vm = xe_vm_get(vm); - - if (extensions) { - /* - * may set q->usm, must come before xe_lrc_create(), - * may overwrite q->sched_props, must come before q->ops->init() - */ - err = exec_queue_user_extensions(xe, q, extensions, 0); - if (err) { - __xe_exec_queue_free(q); - return ERR_PTR(err); - } - } - - return q; -} - -static int __xe_exec_queue_init(struct xe_exec_queue *q) -{ - struct xe_vm *vm = q->vm; - int i, err; - - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - return err; - } - - for (i = 0; i < q->width; ++i) { - q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K); - if (IS_ERR(q->lrc[i])) { - err = PTR_ERR(q->lrc[i]); - goto err_unlock; - } - } - - if (vm) - xe_vm_unlock(vm); - - err = q->ops->init(q); - if (err) - goto err_lrc; - - return 0; - -err_unlock: - if (vm) - xe_vm_unlock(vm); -err_lrc: - for (i = i - 1; i >= 0; --i) - xe_lrc_put(q->lrc[i]); - return err; -} - -struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm, - u32 logical_mask, u16 width, - struct xe_hw_engine *hwe, u32 flags, - u64 extensions) -{ - struct xe_exec_queue *q; - int err; - - q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags, - extensions); - if (IS_ERR(q)) - return q; - - err = __xe_exec_queue_init(q); - if (err) - goto err_post_alloc; - - return q; - -err_post_alloc: - __xe_exec_queue_free(q); - return ERR_PTR(err); -} - -struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, - struct xe_vm *vm, - enum xe_engine_class class, - u32 flags, u64 extensions) -{ - struct xe_hw_engine *hwe, *hwe0 = NULL; - enum xe_hw_engine_id id; - u32 logical_mask = 0; - - for_each_hw_engine(hwe, gt, id) { - if (xe_hw_engine_is_reserved(hwe)) - continue; - - if (hwe->class == class) { - logical_mask |= BIT(hwe->logical_instance); - if (!hwe0) - hwe0 = hwe; - } - } - - if (!logical_mask) - return ERR_PTR(-ENODEV); - - return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions); -} - -/** - * xe_exec_queue_create_bind() - Create bind exec queue. - * @xe: Xe device. - * @tile: tile which bind exec queue belongs to. - * @flags: exec queue creation flags - * @extensions: exec queue creation extensions - * - * Normalize bind exec queue creation. Bind exec queue is tied to migration VM - * for access to physical memory required for page table programming. On a - * faulting devices the reserved copy engine instance must be used to avoid - * deadlocking (user binds cannot get stuck behind faults as kernel binds which - * resolve faults depend on user binds). On non-faulting devices any copy engine - * can be used. - * - * Returns exec queue on success, ERR_PTR on failure - */ -struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, - struct xe_tile *tile, - u32 flags, u64 extensions) -{ - struct xe_gt *gt = tile->primary_gt; - struct xe_exec_queue *q; - struct xe_vm *migrate_vm; - - migrate_vm = xe_migrate_get_vm(tile->migrate); - if (xe->info.has_usm) { - struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, - XE_ENGINE_CLASS_COPY, - gt->usm.reserved_bcs_instance, - false); - - if (!hwe) - return ERR_PTR(-EINVAL); - - q = xe_exec_queue_create(xe, migrate_vm, - BIT(hwe->logical_instance), 1, hwe, - flags, extensions); - } else { - q = xe_exec_queue_create_class(xe, gt, migrate_vm, - XE_ENGINE_CLASS_COPY, flags, - extensions); - } - xe_vm_put(migrate_vm); - - return q; -} - -void xe_exec_queue_destroy(struct kref *ref) -{ - struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); - struct xe_exec_queue *eq, *next; - - xe_exec_queue_last_fence_put_unlocked(q); - if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) { - list_for_each_entry_safe(eq, next, &q->multi_gt_list, - multi_gt_link) - xe_exec_queue_put(eq); - } - - q->ops->fini(q); -} - -void xe_exec_queue_fini(struct xe_exec_queue *q) -{ - int i; - - for (i = 0; i < q->width; ++i) - xe_lrc_put(q->lrc[i]); - __xe_exec_queue_free(q); -} - -void xe_exec_queue_assign_name(struct xe_exec_queue *q, u32 instance) -{ - switch (q->class) { - case XE_ENGINE_CLASS_RENDER: - snprintf(q->name, sizeof(q->name), "rcs%d", instance); - break; - case XE_ENGINE_CLASS_VIDEO_DECODE: - snprintf(q->name, sizeof(q->name), "vcs%d", instance); - break; - case XE_ENGINE_CLASS_VIDEO_ENHANCE: - snprintf(q->name, sizeof(q->name), "vecs%d", instance); - break; - case XE_ENGINE_CLASS_COPY: - snprintf(q->name, sizeof(q->name), "bcs%d", instance); - break; - case XE_ENGINE_CLASS_COMPUTE: - snprintf(q->name, sizeof(q->name), "ccs%d", instance); - break; - case XE_ENGINE_CLASS_OTHER: - snprintf(q->name, sizeof(q->name), "gsccs%d", instance); - break; - default: - XE_WARN_ON(q->class); - } -} - -struct xe_exec_queue *xe_exec_queue_lookup(struct xe_file *xef, u32 id) -{ - struct xe_exec_queue *q; - - mutex_lock(&xef->exec_queue.lock); - q = xa_load(&xef->exec_queue.xa, id); - if (q) - xe_exec_queue_get(q); - mutex_unlock(&xef->exec_queue.lock); - - return q; -} - -enum xe_exec_queue_priority -xe_exec_queue_device_get_max_priority(struct xe_device *xe) -{ - return capable(CAP_SYS_NICE) ? XE_EXEC_QUEUE_PRIORITY_HIGH : - XE_EXEC_QUEUE_PRIORITY_NORMAL; -} - -static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q, - u64 value) -{ - if (XE_IOCTL_DBG(xe, value > XE_EXEC_QUEUE_PRIORITY_HIGH)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe))) - return -EPERM; - - q->sched_props.priority = value; - return 0; -} - -static bool xe_exec_queue_enforce_schedule_limit(void) -{ -#if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) - return true; -#else - return !capable(CAP_SYS_NICE); -#endif -} - -static void -xe_exec_queue_get_prop_minmax(struct xe_hw_engine_class_intf *eclass, - enum xe_exec_queue_sched_prop prop, - u32 *min, u32 *max) -{ - switch (prop) { - case XE_EXEC_QUEUE_JOB_TIMEOUT: - *min = eclass->sched_props.job_timeout_min; - *max = eclass->sched_props.job_timeout_max; - break; - case XE_EXEC_QUEUE_TIMESLICE: - *min = eclass->sched_props.timeslice_min; - *max = eclass->sched_props.timeslice_max; - break; - case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: - *min = eclass->sched_props.preempt_timeout_min; - *max = eclass->sched_props.preempt_timeout_max; - break; - default: - break; - } -#if IS_ENABLED(CONFIG_DRM_XE_ENABLE_SCHEDTIMEOUT_LIMIT) - if (capable(CAP_SYS_NICE)) { - switch (prop) { - case XE_EXEC_QUEUE_JOB_TIMEOUT: - *min = XE_HW_ENGINE_JOB_TIMEOUT_MIN; - *max = XE_HW_ENGINE_JOB_TIMEOUT_MAX; - break; - case XE_EXEC_QUEUE_TIMESLICE: - *min = XE_HW_ENGINE_TIMESLICE_MIN; - *max = XE_HW_ENGINE_TIMESLICE_MAX; - break; - case XE_EXEC_QUEUE_PREEMPT_TIMEOUT: - *min = XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN; - *max = XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX; - break; - default: - break; - } - } -#endif -} - -static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *q, - u64 value) -{ - u32 min = 0, max = 0; - - xe_exec_queue_get_prop_minmax(q->hwe->eclass, - XE_EXEC_QUEUE_TIMESLICE, &min, &max); - - if (xe_exec_queue_enforce_schedule_limit() && - !xe_hw_engine_timeout_in_range(value, min, max)) - return -EINVAL; - - q->sched_props.timeslice_us = value; - return 0; -} - -typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe, - struct xe_exec_queue *q, - u64 value); - -static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = { - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority, - [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice, -}; - -static int exec_queue_user_ext_set_property(struct xe_device *xe, - struct xe_exec_queue *q, - u64 extension) -{ - u64 __user *address = u64_to_user_ptr(extension); - struct drm_xe_ext_set_property ext; - int err; - u32 idx; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(xe, ext.property >= - ARRAY_SIZE(exec_queue_set_property_funcs)) || - XE_IOCTL_DBG(xe, ext.pad) || - XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY && - ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE)) - return -EINVAL; - - idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs)); - if (!exec_queue_set_property_funcs[idx]) - return -EINVAL; - - return exec_queue_set_property_funcs[idx](xe, q, ext.value); -} - -typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe, - struct xe_exec_queue *q, - u64 extension); - -static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = { - [DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property, -}; - -#define MAX_USER_EXTENSIONS 16 -static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q, - u64 extensions, int ext_number) -{ - u64 __user *address = u64_to_user_ptr(extensions); - struct drm_xe_user_extension ext; - int err; - u32 idx; - - if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS)) - return -E2BIG; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(xe, ext.pad) || - XE_IOCTL_DBG(xe, ext.name >= - ARRAY_SIZE(exec_queue_user_extension_funcs))) - return -EINVAL; - - idx = array_index_nospec(ext.name, - ARRAY_SIZE(exec_queue_user_extension_funcs)); - err = exec_queue_user_extension_funcs[idx](xe, q, extensions); - if (XE_IOCTL_DBG(xe, err)) - return err; - - if (ext.next_extension) - return exec_queue_user_extensions(xe, q, ext.next_extension, - ++ext_number); - - return 0; -} - -<<<<<<< -======= -static const enum xe_engine_class user_to_xe_engine_class[] = { - [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, - [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, - [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, - [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, - [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, -}; - -static struct xe_hw_engine * -find_hw_engine(struct xe_device *xe, - struct drm_xe_engine_class_instance eci) -{ - u32 idx; - - if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) - return NULL; - - if (eci.gt_id >= xe->info.gt_count) - return NULL; - - idx = array_index_nospec(eci.engine_class, - ARRAY_SIZE(user_to_xe_engine_class)); - - return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), - user_to_xe_engine_class[idx], - eci.engine_instance, true); -} - -static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt, - struct drm_xe_engine_class_instance *eci, - u16 width, u16 num_placements) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - u32 logical_mask = 0; - - if (XE_IOCTL_DBG(xe, width != 1)) - return 0; - if (XE_IOCTL_DBG(xe, num_placements != 1)) - return 0; - if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) - return 0; - - eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; - - for_each_hw_engine(hwe, gt, id) { - if (xe_hw_engine_is_reserved(hwe)) - continue; - - if (hwe->class == - user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY]) - logical_mask |= BIT(hwe->logical_instance); - } - - return logical_mask; -} - ->>>>>>> -static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, - struct drm_xe_engine_class_instance *eci, - u16 width, u16 num_placements) -{ - int len = width * num_placements; - int i, j, n; - u16 class; - u16 gt_id; - u32 return_mask = 0, prev_mask; - - if (XE_IOCTL_DBG(xe, !xe_device_uc_enabled(xe) && - len > 1)) - return 0; - - for (i = 0; i < width; ++i) { - u32 current_mask = 0; - - for (j = 0; j < num_placements; ++j) { - struct xe_hw_engine *hwe; - - n = j * width + i; - - hwe = find_hw_engine(xe, eci[n]); - if (XE_IOCTL_DBG(xe, !hwe)) - return 0; - - if (XE_IOCTL_DBG(xe, xe_hw_engine_is_reserved(hwe))) - return 0; - - if (XE_IOCTL_DBG(xe, n && eci[n].gt_id != gt_id) || - XE_IOCTL_DBG(xe, n && eci[n].engine_class != class)) - return 0; - - class = eci[n].engine_class; - gt_id = eci[n].gt_id; - - if (width == 1 || !i) - return_mask |= BIT(eci[n].engine_instance); - current_mask |= BIT(eci[n].engine_instance); - } - - /* Parallel submissions must be logically contiguous */ - if (i && XE_IOCTL_DBG(xe, current_mask != prev_mask << 1)) - return 0; - - prev_mask = current_mask; - } - - return return_mask; -} - -int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_exec_queue_create *args = data; - struct drm_xe_engine_class_instance eci[XE_HW_ENGINE_MAX_INSTANCE]; - struct drm_xe_engine_class_instance __user *user_eci = - u64_to_user_ptr(args->instances); - struct xe_hw_engine *hwe; - struct xe_vm *vm; - struct xe_gt *gt; - struct xe_tile *tile; - struct xe_exec_queue *q = NULL; - u32 logical_mask; - u32 id; - u32 len; - int err; - - if (XE_IOCTL_DBG(xe, args->flags) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - len = args->width * args->num_placements; - if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE)) - return -EINVAL; - - err = __copy_from_user(eci, user_eci, - sizeof(struct drm_xe_engine_class_instance) * - len); - if (XE_IOCTL_DBG(xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) - return -EINVAL; - - if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { - if (XE_IOCTL_DBG(xe, args->width != 1) || - XE_IOCTL_DBG(xe, args->num_placements != 1) || - XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) - return -EINVAL; - - for_each_tile(tile, xe, id) { - struct xe_exec_queue *new; - u32 flags = EXEC_QUEUE_FLAG_VM; - - if (id) - flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; - -<<<<<<< - eci[0].gt_id = gt->info.id; - logical_mask = bind_exec_queue_logical_mask(xe, gt, eci, - args->width, - args->num_placements); - if (XE_IOCTL_DBG(xe, !logical_mask)) - return -EINVAL; - - hwe = find_hw_engine(xe, eci[0]); - if (XE_IOCTL_DBG(xe, !hwe)) - return -EINVAL; - - /* The migration vm doesn't hold rpm ref */ - xe_pm_runtime_get_noresume(xe); - - flags = EXEC_QUEUE_FLAG_VM | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0); - - migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); - new = xe_exec_queue_create(xe, migrate_vm, logical_mask, - args->width, hwe, flags, - args->extensions); - - xe_pm_runtime_put(xe); /* now held by engine */ - - xe_vm_put(migrate_vm); -======= - new = xe_exec_queue_create_bind(xe, tile, flags, - args->extensions); ->>>>>>> - if (IS_ERR(new)) { - err = PTR_ERR(new); - if (q) - goto put_exec_queue; - return err; - } - if (id == 0) - q = new; - else - list_add_tail(&new->multi_gt_list, - &q->multi_gt_link); - } - } else { - gt = xe_device_get_gt(xe, eci[0].gt_id); - logical_mask = calc_validate_logical_mask(xe, gt, eci, - args->width, - args->num_placements); - if (XE_IOCTL_DBG(xe, !logical_mask)) - return -EINVAL; - - hwe = find_hw_engine(xe, eci[0]); - if (XE_IOCTL_DBG(xe, !hwe)) - return -EINVAL; - - vm = xe_vm_lookup(xef, args->vm_id); - if (XE_IOCTL_DBG(xe, !vm)) - return -ENOENT; - - err = down_read_interruptible(&vm->lock); - if (err) { - xe_vm_put(vm); - return err; - } - - if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { - up_read(&vm->lock); - xe_vm_put(vm); - return -ENOENT; - } - - q = xe_exec_queue_create(xe, vm, logical_mask, - args->width, hwe, 0, - args->extensions); - up_read(&vm->lock); - xe_vm_put(vm); - if (IS_ERR(q)) - return PTR_ERR(q); - - if (xe_vm_in_preempt_fence_mode(vm)) { - q->lr.context = dma_fence_context_alloc(1); - - err = xe_vm_add_compute_exec_queue(vm, q); - if (XE_IOCTL_DBG(xe, err)) - goto put_exec_queue; - } - } - - mutex_lock(&xef->exec_queue.lock); - err = xa_alloc(&xef->exec_queue.xa, &id, q, xa_limit_32b, GFP_KERNEL); - mutex_unlock(&xef->exec_queue.lock); - if (err) - goto kill_exec_queue; - - args->exec_queue_id = id; - q->xef = xe_file_get(xef); - - return 0; - -kill_exec_queue: - xe_exec_queue_kill(q); -put_exec_queue: - xe_exec_queue_put(q); - return err; -} - -int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_exec_queue_get_property *args = data; - struct xe_exec_queue *q; - int ret; - - if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - q = xe_exec_queue_lookup(xef, args->exec_queue_id); - if (XE_IOCTL_DBG(xe, !q)) - return -ENOENT; - - switch (args->property) { - case DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN: - args->value = q->ops->reset_status(q); - ret = 0; - break; - default: - ret = -EINVAL; - } - - xe_exec_queue_put(q); - - return ret; -} - -/** - * xe_exec_queue_is_lr() - Whether an exec_queue is long-running - * @q: The exec_queue - * - * Return: True if the exec_queue is long-running, false otherwise. - */ -bool xe_exec_queue_is_lr(struct xe_exec_queue *q) -{ - return q->vm && xe_vm_in_lr_mode(q->vm) && - !(q->flags & EXEC_QUEUE_FLAG_VM); -} - -static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q) -{ - return q->lrc[0]->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc[0]) - 1; -} - -/** - * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full - * @q: The exec_queue - * - * Return: True if the exec_queue's ring is full, false otherwise. - */ -bool xe_exec_queue_ring_full(struct xe_exec_queue *q) -{ - struct xe_lrc *lrc = q->lrc[0]; - s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES; - - return xe_exec_queue_num_job_inflight(q) >= max_job; -} - -/** - * xe_exec_queue_is_idle() - Whether an exec_queue is idle. - * @q: The exec_queue - * - * FIXME: Need to determine what to use as the short-lived - * timeline lock for the exec_queues, so that the return value - * of this function becomes more than just an advisory - * snapshot in time. The timeline lock must protect the - * seqno from racing submissions on the same exec_queue. - * Typically vm->resv, but user-created timeline locks use the migrate vm - * and never grabs the migrate vm->resv so we have a race there. - * - * Return: True if the exec_queue is idle, false otherwise. - */ -bool xe_exec_queue_is_idle(struct xe_exec_queue *q) -{ - if (xe_exec_queue_is_parallel(q)) { - int i; - - for (i = 0; i < q->width; ++i) { - if (xe_lrc_seqno(q->lrc[i]) != - q->lrc[i]->fence_ctx.next_seqno - 1) - return false; - } - - return true; - } - - return xe_lrc_seqno(q->lrc[0]) == - q->lrc[0]->fence_ctx.next_seqno - 1; -} - -/** - * xe_exec_queue_update_run_ticks() - Update run time in ticks for this exec queue - * from hw - * @q: The exec queue - * - * Update the timestamp saved by HW for this exec queue and save run ticks - * calculated by using the delta from last update. - */ -void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) -{ - struct xe_file *xef; - struct xe_lrc *lrc; - u32 old_ts, new_ts; - - /* - * Jobs that are run during driver load may use an exec_queue, but are - * not associated with a user xe file, so avoid accumulating busyness - * for kernel specific work. - */ - if (!q->vm || !q->vm->xef) - return; - - xef = q->vm->xef; - - /* - * Only sample the first LRC. For parallel submission, all of them are - * scheduled together and we compensate that below by multiplying by - * width - this may introduce errors if that premise is not true and - * they don't exit 100% aligned. On the other hand, looping through - * the LRCs and reading them in different time could also introduce - * errors. - */ - lrc = q->lrc[0]; - new_ts = xe_lrc_update_timestamp(lrc, &old_ts); - xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; -} - -void xe_exec_queue_kill(struct xe_exec_queue *q) -{ - struct xe_exec_queue *eq = q, *next; - - list_for_each_entry_safe(eq, next, &eq->multi_gt_list, - multi_gt_link) { - q->ops->kill(eq); - xe_vm_remove_compute_exec_queue(q->vm, eq); - } - - q->ops->kill(q); - xe_vm_remove_compute_exec_queue(q->vm, q); -} - -int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_exec_queue_destroy *args = data; - struct xe_exec_queue *q; - - if (XE_IOCTL_DBG(xe, args->pad) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - mutex_lock(&xef->exec_queue.lock); - q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id); - mutex_unlock(&xef->exec_queue.lock); - if (XE_IOCTL_DBG(xe, !q)) - return -ENOENT; - - xe_exec_queue_kill(q); - - trace_xe_exec_queue_close(q); - xe_exec_queue_put(q); - - return 0; -} - -static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, - struct xe_vm *vm) -{ - if (q->flags & EXEC_QUEUE_FLAG_VM) - lockdep_assert_held(&vm->lock); - else - xe_vm_assert_held(vm); -} - -/** - * xe_exec_queue_last_fence_put() - Drop ref to last fence - * @q: The exec queue - * @vm: The VM the engine does a bind or exec for - */ -void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) -{ - xe_exec_queue_last_fence_lockdep_assert(q, vm); - - if (q->last_fence) { - dma_fence_put(q->last_fence); - q->last_fence = NULL; - } -} - -/** - * xe_exec_queue_last_fence_put_unlocked() - Drop ref to last fence unlocked - * @q: The exec queue - * - * Only safe to be called from xe_exec_queue_destroy(). - */ -void xe_exec_queue_last_fence_put_unlocked(struct xe_exec_queue *q) -{ - if (q->last_fence) { - dma_fence_put(q->last_fence); - q->last_fence = NULL; - } -} - -/** - * xe_exec_queue_last_fence_get() - Get last fence - * @q: The exec queue - * @vm: The VM the engine does a bind or exec for - * - * Get last fence, takes a ref - * - * Returns: last fence if not signaled, dma fence stub if signaled - */ -struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, - struct xe_vm *vm) -{ - struct dma_fence *fence; - - xe_exec_queue_last_fence_lockdep_assert(q, vm); - - if (q->last_fence && - test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) - xe_exec_queue_last_fence_put(q, vm); - - fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); - dma_fence_get(fence); - return fence; -} - -/** - * xe_exec_queue_last_fence_set() - Set last fence - * @q: The exec queue - * @vm: The VM the engine does a bind or exec for - * @fence: The fence - * - * Set the last fence for the engine. Increases reference count for fence, when - * closing engine xe_exec_queue_last_fence_put should be called. - */ -void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, - struct dma_fence *fence) -{ - xe_exec_queue_last_fence_lockdep_assert(q, vm); - - xe_exec_queue_last_fence_put(q, vm); - q->last_fence = dma_fence_get(fence); -} diff --git a/rr-cache/7972ffc51d04b1cc788e89be6c4c3744f14e3965/preimage b/rr-cache/7972ffc51d04b1cc788e89be6c4c3744f14e3965/preimage deleted file mode 100644 index ca9c1e090e6e..000000000000 --- a/rr-cache/7972ffc51d04b1cc788e89be6c4c3744f14e3965/preimage +++ /dev/null @@ -1,2017 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include "xe_pt.h" - -#include "regs/xe_gtt_defs.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_exec_queue.h" -#include "xe_gt.h" -#include "xe_gt_tlb_invalidation.h" -#include "xe_migrate.h" -#include "xe_pt_types.h" -#include "xe_pt_walk.h" -#include "xe_res_cursor.h" -#include "xe_sched_job.h" -#include "xe_sync.h" -#include "xe_trace.h" -#include "xe_ttm_stolen_mgr.h" -#include "xe_vm.h" - -struct xe_pt_dir { - struct xe_pt pt; - /** @children: Array of page-table child nodes */ - struct xe_ptw *children[XE_PDES]; -}; - -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) -#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) -#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) -#else -#define xe_pt_set_addr(__xe_pt, __addr) -#define xe_pt_addr(__xe_pt) 0ull -#endif - -static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; -static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; - -#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) - -static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) -{ - return container_of(pt, struct xe_pt_dir, pt); -} - -static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) -{ - return container_of(pt_dir->children[index], struct xe_pt, base); -} - -static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, - unsigned int level) -{ - struct xe_device *xe = tile_to_xe(tile); - u16 pat_index = xe->pat.idx[XE_CACHE_WB]; - u8 id = tile->id; - - if (!xe_vm_has_scratch(vm)) - return 0; - - if (level > MAX_HUGEPTE_LEVEL) - return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, - 0, pat_index); - - return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | - XE_PTE_NULL; -} - -static void xe_pt_free(struct xe_pt *pt) -{ - if (pt->level) - kfree(as_xe_pt_dir(pt)); - else - kfree(pt); -} - -/** - * xe_pt_create() - Create a page-table. - * @vm: The vm to create for. - * @tile: The tile to create for. - * @level: The page-table level. - * - * Allocate and initialize a single struct xe_pt metadata structure. Also - * create the corresponding page-table bo, but don't initialize it. If the - * level is grater than zero, then it's assumed to be a directory page- - * table and the directory structure is also allocated and initialized to - * NULL pointers. - * - * Return: A valid struct xe_pt pointer on success, Pointer error code on - * error. - */ -struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level) -{ - struct xe_pt *pt; - struct xe_bo *bo; - int err; - - if (level) { - struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL); - - pt = (dir) ? &dir->pt : NULL; - } else { - pt = kzalloc(sizeof(*pt), GFP_KERNEL); - } - if (!pt) - return ERR_PTR(-ENOMEM); - - pt->level = level; - bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_NO_RESV_EVICT | - XE_BO_FLAG_PAGETABLE); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - goto err_kfree; - } - pt->bo = bo; - pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; - - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, pt->bo); - xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL); - - return pt; - -err_kfree: - xe_pt_free(pt); - return ERR_PTR(err); -} - -/** - * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero - * entries. - * @tile: The tile the scratch pagetable of which to use. - * @vm: The vm we populate for. - * @pt: The pagetable the bo of which to initialize. - * - * Populate the page-table bo of @pt with entries pointing into the tile's - * scratch page-table tree if any. Otherwise populate with zeros. - */ -void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, - struct xe_pt *pt) -{ - struct iosys_map *map = &pt->bo->vmap; - u64 empty; - int i; - - if (!xe_vm_has_scratch(vm)) { - /* - * FIXME: Some memory is allocated already allocated to zero? - * Find out which memory that is and avoid this memset... - */ - xe_map_memset(vm->xe, map, 0, 0, SZ_4K); - } else { - empty = __xe_pt_empty_pte(tile, vm, pt->level); - for (i = 0; i < XE_PDES; i++) - xe_pt_write(vm->xe, map, i, empty); - } -} - -/** - * xe_pt_shift() - Return the ilog2 value of the size of the address range of - * a page-table at a certain level. - * @level: The level. - * - * Return: The ilog2 value of the size of the address range of a page-table - * at level @level. - */ -unsigned int xe_pt_shift(unsigned int level) -{ - return XE_PTE_SHIFT + XE_PDE_SHIFT * level; -} - -/** - * xe_pt_destroy() - Destroy a page-table tree. - * @pt: The root of the page-table tree to destroy. - * @flags: vm flags. Currently unused. - * @deferred: List head of lockless list for deferred putting. NULL for - * immediate putting. - * - * Puts the page-table bo, recursively calls xe_pt_destroy on all children - * and finally frees @pt. TODO: Can we remove the @flags argument? - */ -void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) -{ - int i; - - if (!pt) - return; - - XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list)); - xe_bo_unpin(pt->bo); - xe_bo_put_deferred(pt->bo, deferred); - - if (pt->level > 0 && pt->num_live) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - - for (i = 0; i < XE_PDES; i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, - deferred); - } - } - xe_pt_free(pt); -} - -/** - * DOC: Pagetable building - * - * Below we use the term "page-table" for both page-directories, containing - * pointers to lower level page-directories or page-tables, and level 0 - * page-tables that contain only page-table-entries pointing to memory pages. - * - * When inserting an address range in an already existing page-table tree - * there will typically be a set of page-tables that are shared with other - * address ranges, and a set that are private to this address range. - * The set of shared page-tables can be at most two per level, - * and those can't be updated immediately because the entries of those - * page-tables may still be in use by the gpu for other mappings. Therefore - * when inserting entries into those, we instead stage those insertions by - * adding insertion data into struct xe_vm_pgtable_update structures. This - * data, (subtrees for the cpu and page-table-entries for the gpu) is then - * added in a separate commit step. CPU-data is committed while still under the - * vm lock, the object lock and for userptr, the notifier lock in read mode. - * The GPU async data is committed either by the GPU or CPU after fulfilling - * relevant dependencies. - * For non-shared page-tables (and, in fact, for shared ones that aren't - * existing at the time of staging), we add the data in-place without the - * special update structures. This private part of the page-table tree will - * remain disconnected from the vm page-table tree until data is committed to - * the shared page tables of the vm tree in the commit phase. - */ - -struct xe_pt_update { - /** @update: The update structure we're building for this parent. */ - struct xe_vm_pgtable_update *update; - /** @parent: The parent. Used to detect a parent change. */ - struct xe_pt *parent; - /** @preexisting: Whether the parent was pre-existing or allocated */ - bool preexisting; -}; - -struct xe_pt_stage_bind_walk { - /** base: The base class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @vm: The vm we're building for. */ - struct xe_vm *vm; - /** @tile: The tile we're building for. */ - struct xe_tile *tile; - /** @default_pte: PTE flag only template. No address is associated */ - u64 default_pte; - /** @dma_offset: DMA offset to add to the PTE. */ - u64 dma_offset; - /** - * @needs_64k: This address range enforces 64K alignment and - * granularity. - */ - bool needs_64K; - /** - * @vma: VMA being mapped - */ - struct xe_vma *vma; - - /* Also input, but is updated during the walk*/ - /** @curs: The DMA address cursor. */ - struct xe_res_cursor *curs; - /** @va_curs_start: The Virtual address coresponding to @curs->start */ - u64 va_curs_start; - - /* Output */ - struct xe_walk_update { - /** @wupd.entries: Caller provided storage. */ - struct xe_vm_pgtable_update *entries; - /** @wupd.num_used_entries: Number of update @entries used. */ - unsigned int num_used_entries; - /** @wupd.updates: Tracks the update entry at a given level */ - struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; - } wupd; - - /* Walk state */ - /** - * @l0_end_addr: The end address of the current l0 leaf. Used for - * 64K granularity detection. - */ - u64 l0_end_addr; - /** @addr_64K: The start address of the current 64K chunk. */ - u64 addr_64K; - /** @found_64: Whether @add_64K actually points to a 64K chunk. */ - bool found_64K; -}; - -static int -xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, - pgoff_t offset, bool alloc_entries) -{ - struct xe_pt_update *upd = &wupd->updates[parent->level]; - struct xe_vm_pgtable_update *entry; - - /* - * For *each level*, we could only have one active - * struct xt_pt_update at any one time. Once we move on to a - * new parent and page-directory, the old one is complete, and - * updates are either already stored in the build tree or in - * @wupd->entries - */ - if (likely(upd->parent == parent)) - return 0; - - upd->parent = parent; - upd->preexisting = true; - - if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) - return -EINVAL; - - entry = wupd->entries + wupd->num_used_entries++; - upd->update = entry; - entry->ofs = offset; - entry->pt_bo = parent->bo; - entry->pt = parent; - entry->flags = 0; - entry->qwords = 0; - entry->pt_bo->update_index = -1; - - if (alloc_entries) { - entry->pt_entries = kmalloc_array(XE_PDES, - sizeof(*entry->pt_entries), - GFP_KERNEL); - if (!entry->pt_entries) - return -ENOMEM; - } - - return 0; -} - -/* - * NOTE: This is a very frequently called function so we allow ourselves - * to annotate (using branch prediction hints) the fastpath of updating a - * non-pre-existing pagetable with leaf ptes. - */ -static int -xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, - pgoff_t offset, struct xe_pt *xe_child, u64 pte) -{ - struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; - struct xe_pt_update *child_upd = xe_child ? - &xe_walk->wupd.updates[xe_child->level] : NULL; - int ret; - - ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); - if (unlikely(ret)) - return ret; - - /* - * Register this new pagetable so that it won't be recognized as - * a shared pagetable by a subsequent insertion. - */ - if (unlikely(child_upd)) { - child_upd->update = NULL; - child_upd->parent = xe_child; - child_upd->preexisting = false; - } - - if (likely(!upd->preexisting)) { - /* Continue building a non-connected subtree. */ - struct iosys_map *map = &parent->bo->vmap; - - if (unlikely(xe_child)) - parent->base.children[offset] = &xe_child->base; - - xe_pt_write(xe_walk->vm->xe, map, offset, pte); - parent->num_live++; - } else { - /* Shared pt. Stage update. */ - unsigned int idx; - struct xe_vm_pgtable_update *entry = upd->update; - - idx = offset - entry->ofs; - entry->pt_entries[idx].pt = xe_child; - entry->pt_entries[idx].pte = pte; - entry->qwords++; - } - - return 0; -} - -static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, - struct xe_pt_stage_bind_walk *xe_walk) -{ - u64 size, dma; - - if (level > MAX_HUGEPTE_LEVEL) - return false; - - /* Does the virtual range requested cover a huge pte? */ - if (!xe_pt_covers(addr, next, level, &xe_walk->base)) - return false; - - /* Does the DMA segment cover the whole pte? */ - if (next - xe_walk->va_curs_start > xe_walk->curs->size) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - /* Is the DMA address huge PTE size aligned? */ - size = next - addr; - dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); - - return IS_ALIGNED(dma, size); -} - -/* - * Scan the requested mapping to check whether it can be done entirely - * with 64K PTEs. - */ -static bool -xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - struct xe_res_cursor curs = *xe_walk->curs; - - if (!IS_ALIGNED(addr, SZ_64K)) - return false; - - if (next > xe_walk->l0_end_addr) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - xe_res_next(&curs, addr - xe_walk->va_curs_start); - for (; addr < next; addr += SZ_64K) { - if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) - return false; - - xe_res_next(&curs, SZ_64K); - } - - return addr == next; -} - -/* - * For non-compact "normal" 4K level-0 pagetables, we want to try to group - * addresses together in 64K-contigous regions to add a 64K TLB hint for the - * device to the PTE. - * This function determines whether the address is part of such a - * segment. For VRAM in normal pagetables, this is strictly necessary on - * some devices. - */ -static bool -xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - /* Address is within an already found 64k region */ - if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) - return true; - - xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); - xe_walk->addr_64K = addr; - - return xe_walk->found_64K; -} - -static int -xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_bind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - u16 pat_index = xe_walk->vma->pat_index; - struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); - struct xe_vm *vm = xe_walk->vm; - struct xe_pt *xe_child; - bool covers; - int ret = 0; - u64 pte; - - /* Is this a leaf entry ?*/ - if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { - struct xe_res_cursor *curs = xe_walk->curs; - bool is_null = xe_vma_is_null(xe_walk->vma); - - XE_WARN_ON(xe_walk->va_curs_start != addr); - - pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : - xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->vma, pat_index, level); - pte |= xe_walk->default_pte; - - /* - * Set the XE_PTE_PS64 hint if possible, otherwise if - * this device *requires* 64K PTE size for VRAM, fail. - */ - if (level == 0 && !xe_parent->is_compact) { - if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; - pte |= XE_PTE_PS64; - } else if (XE_WARN_ON(xe_walk->needs_64K)) { - return -EINVAL; - } - } - - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); - if (unlikely(ret)) - return ret; - - if (!is_null) - xe_res_next(curs, next - addr); - xe_walk->va_curs_start = next; - xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); - *action = ACTION_CONTINUE; - - return ret; - } - - /* - * Descending to lower level. Determine if we need to allocate a - * new page table or -directory, which we do if there is no - * previous one or there is one we can completely replace. - */ - if (level == 1) { - walk->shifts = xe_normal_pt_shifts; - xe_walk->l0_end_addr = next; - } - - covers = xe_pt_covers(addr, next, level, &xe_walk->base); - if (covers || !*child) { - u64 flags = 0; - - xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); - if (IS_ERR(xe_child)) - return PTR_ERR(xe_child); - - xe_pt_set_addr(xe_child, - round_down(addr, 1ull << walk->shifts[level])); - - if (!covers) - xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child); - - *child = &xe_child->base; - - /* - * Prefer the compact pagetable layout for L0 if possible. Only - * possible if VMA covers entire 2MB region as compact 64k and - * 4k pages cannot be mixed within a 2MB region. - * TODO: Suballocate the pt bo to avoid wasting a lot of - * memory. - */ - if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && - covers && xe_pt_scan_64K(addr, next, xe_walk)) { - walk->shifts = xe_compact_pt_shifts; - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT; - flags |= XE_PDE_64K; - xe_child->is_compact = true; - } - - pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, - pte); - } - - *action = ACTION_SUBTREE; - return ret; -} - -static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { - .pt_entry = xe_pt_stage_bind_entry, -}; - -/** - * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address - * range. - * @tile: The tile we're building for. - * @vma: The vma indicating the address range. - * @entries: Storage for the update entries used for connecting the tree to - * the main tree at commit time. - * @num_entries: On output contains the number of @entries used. - * - * This function builds a disconnected page-table tree for a given address - * range. The tree is connected to the main vm tree for the gpu using - * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). - * The function builds xe_vm_pgtable_update structures for already existing - * shared page-tables, and non-existing shared and non-shared page-tables - * are built and populated directly. - * - * Return 0 on success, negative error code on error. - */ -static int -xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - struct xe_device *xe = tile_to_xe(tile); - struct xe_bo *bo = xe_vma_bo(vma); - bool is_devmem = !xe_vma_is_userptr(vma) && bo && - (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); - struct xe_res_cursor curs; - struct xe_pt_stage_bind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_bind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .vm = xe_vma_vm(vma), - .tile = tile, - .curs = &curs, - .va_curs_start = xe_vma_start(vma), - .vma = vma, - .wupd.entries = entries, - .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - int ret; - - /** - * Default atomic expectations for different allocation scenarios are as follows: - * - * 1. Traditional API: When the VM is not in LR mode: - * - Device atomics are expected to function with all allocations. - * - * 2. Compute/SVM API: When the VM is in LR mode: - * - Device atomics are the default behavior when the bo is placed in a single region. - * - In all other cases device atomics will be disabled with AE=0 until an application - * request differently using a ioctl like madvise. - */ - if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { - if (xe_vm_in_lr_mode(xe_vma_vm(vma))) { - if (bo && xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - /** - * If a SMEM+LMEM allocation is backed by SMEM, a device - * atomics will cause a gpu page fault and which then - * gets migrated to LMEM, bind such allocations with - * device atomics enabled. - */ - else if (is_devmem && !xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } else { - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } - - /** - * Unset AE if the platform(PVC) doesn't support it on an - * allocation - */ - if (!xe->info.has_device_atomics_on_smem && !is_devmem) - xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE; - } - - if (is_devmem) { - xe_walk.default_pte |= XE_PPGTT_PTE_DM; - xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); - } - - if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) - xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); - - xe_bo_assert_held(bo); - - if (!xe_vma_is_null(vma)) { - if (xe_vma_is_userptr(vma)) - xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, - xe_vma_size(vma), &curs); - else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) - xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - else - xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - } else { - curs.size = xe_vma_size(vma); - } - - ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - *num_entries = xe_walk.wupd.num_used_entries; - return ret; -} - -/** - * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a - * shared pagetable. - * @addr: The start address within the non-shared pagetable. - * @end: The end address within the non-shared pagetable. - * @level: The level of the non-shared pagetable. - * @walk: Walk info. The function adjusts the walk action. - * @action: next action to perform (see enum page_walk_action) - * @offset: Ignored on input, First non-shared entry on output. - * @end_offset: Ignored on input, Last non-shared entry + 1 on output. - * - * A non-shared page-table has some entries that belong to the address range - * and others that don't. This function determines the entries that belong - * fully to the address range. Depending on level, some entries may - * partially belong to the address range (that can't happen at level 0). - * The function detects that and adjust those offsets to not include those - * partial entries. Iff it does detect partial entries, we know that there must - * be shared page tables also at lower levels, so it adjusts the walk action - * accordingly. - * - * Return: true if there were non-shared entries, false otherwise. - */ -static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, - struct xe_pt_walk *walk, - enum page_walk_action *action, - pgoff_t *offset, pgoff_t *end_offset) -{ - u64 size = 1ull << walk->shifts[level]; - - *offset = xe_pt_offset(addr, level, walk); - *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; - - if (!level) - return true; - - /* - * If addr or next are not size aligned, there are shared pts at lower - * level, so in that case traverse down the subtree - */ - *action = ACTION_CONTINUE; - if (!IS_ALIGNED(addr, size)) { - *action = ACTION_SUBTREE; - (*offset)++; - } - - if (!IS_ALIGNED(end, size)) { - *action = ACTION_SUBTREE; - (*end_offset)--; - } - - return *end_offset > *offset; -} - -struct xe_pt_zap_ptes_walk { - /** @base: The walk base-class */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're building for */ - struct xe_tile *tile; - - /* Output */ - /** @needs_invalidate: Whether we need to invalidate TLB*/ - bool needs_invalidate; -}; - -static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_zap_ptes_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - /* - * Note that we're called from an entry callback, and we're dealing - * with the child of that entry rather than the parent, so need to - * adjust level down. - */ - if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, - &end_offset)) { - xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap, - offset * sizeof(u64), 0, - (end_offset - offset) * sizeof(u64)); - xe_walk->needs_invalidate = true; - } - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { - .pt_entry = xe_pt_zap_ptes_entry, -}; - -/** - * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range - * @tile: The tile we're zapping for. - * @vma: GPU VMA detailing address range. - * - * Eviction and Userptr invalidation needs to be able to zap the - * gpu ptes of a given address range in pagefaulting mode. - * In order to be able to do that, that function needs access to the shared - * page-table entrieaso it can either clear the leaf PTEs or - * clear the pointers to lower-level page-tables. The caller is required - * to hold the necessary locks to ensure neither the page-table connectivity - * nor the page-table entries of the range is updated from under us. - * - * Return: Whether ptes were actually updated and a TLB invalidation is - * required. - */ -bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) -{ - struct xe_pt_zap_ptes_walk xe_walk = { - .base = { - .ops = &xe_pt_zap_ptes_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); - - if (!(pt_mask & BIT(tile->id))) - return false; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.needs_invalidate; -} - -static void -xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, - struct iosys_map *map, void *data, - u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_pt_entry *ptes = update->pt_entries; - u64 *ptr = data; - u32 i; - - for (i = 0; i < num_qwords; i++) { - if (map) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, ptes[i].pte); - else - ptr[i] = ptes[i].pte; - } -} - -static void xe_pt_abort_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i, j; - - for (i = 0; i < num_entries; i++) { - if (!entries[i].pt_entries) - continue; - - for (j = 0; j < entries[i].qwords; j++) - xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL); - kfree(entries[i].pt_entries); - } -} - -static void xe_pt_commit_locks_assert(struct xe_vma *vma) -{ - struct xe_vm *vm = xe_vma_vm(vma); - - lockdep_assert_held(&vm->lock); - - if (!xe_vma_is_userptr(vma) && !xe_vma_is_null(vma)) - dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); - - xe_vm_assert_held(vm); -} - -static void xe_pt_commit_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries, bool rebind, - struct llist_head *deferred) -{ - u32 i, j; - - xe_pt_commit_locks_assert(vma); - - for (i = 0; i < num_entries; i++) { - struct xe_pt *pt = entries[i].pt; - struct xe_pt_dir *pt_dir; - - if (!rebind) - pt->num_live += entries[i].qwords; - - if (!pt->level) - continue; - - pt_dir = as_xe_pt_dir(pt); - for (j = 0; j < entries[i].qwords; j++) { - u32 j_ = j + entries[i].ofs; - struct xe_pt *newpte = entries[i].pt_entries[j].pt; - - if (xe_pt_entry(pt_dir, j_)) - xe_pt_destroy(xe_pt_entry(pt_dir, j_), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[j_] = &newpte->base; - } - } -} - -static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i; - - for (i = 0; i < num_entries; i++) - kfree(entries[i].pt_entries); -} - -static int -xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - int err; - - *num_entries = 0; - err = xe_pt_stage_bind(tile, vma, entries, num_entries); - if (!err) - xe_tile_assert(tile, *num_entries); - else /* abort! */ - xe_pt_abort_bind(vma, entries, *num_entries); - - return err; -} - -static void xe_vm_dbg_print_entries(struct xe_device *xe, - const struct xe_vm_pgtable_update *entries, - unsigned int num_entries, bool bind) -#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) -{ - unsigned int i; - - vm_dbg(&xe->drm, "%s: %u entries to update\n", bind ? "bind" : "unbind", - num_entries); - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - struct xe_pt *xe_pt = entry->pt; - u64 page_size = 1ull << xe_pt_shift(xe_pt->level); - u64 end; - u64 start; - - xe_assert(xe, !entry->pt->is_compact); - start = entry->ofs * page_size; - end = start + page_size * entry->qwords; - vm_dbg(&xe->drm, - "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", - i, xe_pt->level, entry->ofs, entry->qwords, - xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); - } -} -#else -{} -#endif - -static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs) -{ - int i; - - for (i = 0; i < num_syncs; i++) { - struct dma_fence *fence = syncs[i].fence; - - if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &fence->flags)) - return false; - } - - return true; -} - -static int job_test_add_deps(struct xe_sched_job *job, - struct dma_resv *resv, - enum dma_resv_usage usage) -{ - if (!job) { - if (!dma_resv_test_signaled(resv, usage)) - return -ETIME; - - return 0; - } - - return xe_sched_job_add_deps(job, resv, usage); -} - -static int vma_add_deps(struct xe_vma *vma, struct xe_sched_job *job) -{ - struct xe_bo *bo = xe_vma_bo(vma); - - xe_bo_assert_held(bo); - - if (bo && !bo->vm) - return job_test_add_deps(job, bo->ttm.base.resv, - DMA_RESV_USAGE_KERNEL); - - return 0; -} - -static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op, - struct xe_sched_job *job) -{ - int err = 0; - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = vma_add_deps(op->map.vma, job); - break; - case DRM_GPUVA_OP_REMAP: - if (op->remap.prev) - err = vma_add_deps(op->remap.prev, job); - if (!err && op->remap.next) - err = vma_add_deps(op->remap.next, job); - break; - case DRM_GPUVA_OP_UNMAP: - break; - case DRM_GPUVA_OP_PREFETCH: - err = vma_add_deps(gpuva_to_vma(op->base.prefetch.va), job); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static int xe_pt_vm_dependencies(struct xe_sched_job *job, - struct xe_vm *vm, - struct xe_vma_ops *vops, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_range_fence_tree *rftree) -{ - struct xe_range_fence *rtfence; - struct dma_fence *fence; - struct xe_vma_op *op; - int err = 0, i; - - xe_vm_assert_held(vm); - - if (!job && !no_in_syncs(vops->syncs, vops->num_syncs)) - return -ETIME; - - if (!job && !xe_exec_queue_is_idle(pt_update_ops->q)) - return -ETIME; - - if (pt_update_ops->wait_vm_bookkeep || pt_update_ops->wait_vm_kernel) { - err = job_test_add_deps(job, xe_vm_resv(vm), - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_BOOKKEEP : - DMA_RESV_USAGE_KERNEL); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_first(rftree, pt_update_ops->start, - pt_update_ops->last); - while (rtfence) { - fence = rtfence->fence; - - if (!dma_fence_is_signaled(fence)) { - /* - * Is this a CPU update? GPU is busy updating, so return - * an error - */ - if (!job) - return -ETIME; - - dma_fence_get(fence); - err = drm_sched_job_add_dependency(&job->drm, fence); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_next(rtfence, - pt_update_ops->start, - pt_update_ops->last); - } - - list_for_each_entry(op, &vops->list, link) { - err = op_add_deps(vm, op, job); - if (err) - return err; - } - - if (job) - err = xe_sched_job_last_fence_add_dep(job, vm); - else - err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); - - for (i = 0; job && !err && i < vops->num_syncs; i++) - err = xe_sync_entry_add_deps(&vops->syncs[i], job); - - return err; -} - -static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_vma_ops *vops = pt_update->vops; - struct xe_vm *vm = vops->vm; - struct xe_range_fence_tree *rftree = &vm->rftree[pt_update->tile_id]; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[pt_update->tile_id]; - - return xe_pt_vm_dependencies(pt_update->job, vm, pt_update->vops, - pt_update_ops, rftree); -} - -#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; - static u32 count; - - if (count++ % divisor == divisor - 1) { - uvma->userptr.divisor = divisor << 1; - return true; - } - - return false; -} - -#else - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - return false; -} - -#endif - -static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, - struct xe_vm_pgtable_update_ops *pt_update) -{ - struct xe_userptr_vma *uvma; - unsigned long notifier_seq; - - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - if (!xe_vma_is_userptr(vma)) - return 0; - - uvma = to_userptr_vma(vma); - notifier_seq = uvma->userptr.notifier_seq; - - if (uvma->userptr.initial_bind && !xe_vm_in_fault_mode(vm)) - return 0; - - if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq) && - !xe_pt_userptr_inject_eagain(uvma)) - return 0; - - if (xe_vm_in_fault_mode(vm)) { - return -EAGAIN; - } else { - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - - if (xe_vm_in_preempt_fence_mode(vm)) { - struct dma_resv_iter cursor; - struct dma_fence *fence; - long err; - - dma_resv_iter_begin(&cursor, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); - dma_resv_for_each_fence_unlocked(&cursor, fence) - dma_fence_enable_sw_signaling(fence); - dma_resv_iter_end(&cursor); - - err = dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - XE_WARN_ON(err <= 0); - } - } - - return 0; -} - -static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op, - struct xe_vm_pgtable_update_ops *pt_update) -{ - int err = 0; - - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = vma_check_userptr(vm, op->map.vma, pt_update); - break; - case DRM_GPUVA_OP_REMAP: - if (op->remap.prev) - err = vma_check_userptr(vm, op->remap.prev, pt_update); - if (!err && op->remap.next) - err = vma_check_userptr(vm, op->remap.next, pt_update); - break; - case DRM_GPUVA_OP_UNMAP: - break; - case DRM_GPUVA_OP_PREFETCH: - err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), - pt_update); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_vm *vm = pt_update->vops->vm; - struct xe_vma_ops *vops = pt_update->vops; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[pt_update->tile_id]; - struct xe_vma_op *op; - int err; - - err = xe_pt_pre_commit(pt_update); - if (err) - return err; - - down_read(&vm->userptr.notifier_lock); - - list_for_each_entry(op, &vops->list, link) { - err = op_check_userptr(vm, op, pt_update_ops); - if (err) { - up_read(&vm->userptr.notifier_lock); - break; - } - } - - return err; -} - -struct invalidation_fence { - struct xe_gt_tlb_invalidation_fence base; - struct xe_gt *gt; - struct dma_fence *fence; - struct dma_fence_cb cb; - struct work_struct work; - u64 start; - u64 end; - u32 asid; -}; - -static void invalidation_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct invalidation_fence *ifence = - container_of(cb, struct invalidation_fence, cb); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); - if (!ifence->fence->error) { - queue_work(system_wq, &ifence->work); - } else { - ifence->base.base.error = ifence->fence->error; - dma_fence_signal(&ifence->base.base); - dma_fence_put(&ifence->base.base); - } - dma_fence_put(ifence->fence); -} - -static void invalidation_fence_work_func(struct work_struct *w) -{ - struct invalidation_fence *ifence = - container_of(w, struct invalidation_fence, work); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); - xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, - ifence->end, ifence->asid); -} - -static int invalidation_fence_init(struct xe_gt *gt, - struct invalidation_fence *ifence, - struct dma_fence *fence, - u64 start, u64 end, u32 asid) -{ - int ret; - - trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - -<<<<<<< - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base); -======= - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); ->>>>>>> - - ifence->fence = fence; - ifence->gt = gt; - ifence->start = start; - ifence->end = end; - ifence->asid = asid; - - INIT_WORK(&ifence->work, invalidation_fence_work_func); - ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); - if (ret == -ENOENT) { - dma_fence_put(ifence->fence); /* Usually dropped in CB */ - invalidation_fence_work_func(&ifence->work); - } else if (ret) { - dma_fence_put(&ifence->base.base); /* Caller ref */ - dma_fence_put(&ifence->base.base); /* Creation ref */ - } - - xe_gt_assert(gt, !ret || ret == -ENOENT); - - return ret && ret != -ENOENT ? ret : 0; -} - -struct xe_pt_stage_unbind_walk { - /** @base: The pagewalk base-class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're unbinding from. */ - struct xe_tile *tile; - - /** - * @modified_start: Walk range start, modified to include any - * shared pagetables that we're the only user of and can thus - * treat as private. - */ - u64 modified_start; - /** @modified_end: Walk range start, modified like @modified_start. */ - u64 modified_end; - - /* Output */ - /* @wupd: Structure to track the page-table updates we're building */ - struct xe_walk_update wupd; -}; - -/* - * Check whether this range is the only one populating this pagetable, - * and in that case, update the walk range checks so that higher levels don't - * view us as a shared pagetable. - */ -static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, - const struct xe_pt *child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - unsigned int shift = walk->shifts[level]; - u64 size = 1ull << shift; - - if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && - ((next - addr) >> shift) == child->num_live) { - u64 size = 1ull << walk->shifts[level + 1]; - - *action = ACTION_CONTINUE; - - if (xe_walk->modified_start >= addr) - xe_walk->modified_start = round_down(addr, size); - if (xe_walk->modified_end <= next) - xe_walk->modified_end = round_up(next, size); - - return true; - } - - return false; -} - -static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); - - return 0; -} - -static int -xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - u64 size = 1ull << walk->shifts[--level]; - - if (!IS_ALIGNED(addr, size)) - addr = xe_walk->modified_start; - if (!IS_ALIGNED(next, size)) - next = xe_walk->modified_end; - - /* Parent == *child is the root pt. Don't kill it. */ - if (parent != *child && - xe_pt_check_kill(addr, next, level, xe_child, action, walk)) - return 0; - - if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, - &end_offset)) - return 0; - - (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false); - xe_walk->wupd.updates[level].update->qwords = end_offset - offset; - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { - .pt_entry = xe_pt_stage_unbind_entry, - .pt_post_descend = xe_pt_stage_unbind_post_descend, -}; - -/** - * xe_pt_stage_unbind() - Build page-table update structures for an unbind - * operation - * @tile: The tile we're unbinding for. - * @vma: The vma we're unbinding. - * @entries: Caller-provided storage for the update structures. - * - * Builds page-table update structures for an unbind operation. The function - * will attempt to remove all page-tables that we're the only user - * of, and for that to work, the unbind operation must be committed in the - * same critical section that blocks racing binds to the same page-table tree. - * - * Return: The number of entries used. - */ -static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries) -{ - struct xe_pt_stage_unbind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_unbind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - .modified_start = xe_vma_start(vma), - .modified_end = xe_vma_end(vma), - .wupd.entries = entries, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.wupd.num_used_entries; -} - -static void -xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, - struct xe_tile *tile, struct iosys_map *map, - void *ptr, u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_vm *vm = pt_update->vops->vm; - u64 empty = __xe_pt_empty_pte(tile, vm, update->pt->level); - int i; - - if (map && map->is_iomem) - for (i = 0; i < num_qwords; ++i) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, empty); - else if (map) - memset64(map->vaddr + qword_ofs * sizeof(u64), empty, - num_qwords); - else - memset64(ptr, empty, num_qwords); -} - -static void -xe_pt_commit_unbind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 num_entries, - struct llist_head *deferred) -{ - u32 j; - - xe_pt_commit_locks_assert(vma); - - for (j = 0; j < num_entries; ++j) { - struct xe_vm_pgtable_update *entry = &entries[j]; - struct xe_pt *pt = entry->pt; - - pt->num_live -= entry->qwords; - if (pt->level) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - u32 i; - - for (i = entry->ofs; i < entry->ofs + entry->qwords; - i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[i] = NULL; - } - } - } -} - -static void -xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int i, level = 0; - u64 start, last; - - for (i = 0; i < pt_op->num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &pt_op->entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level)); - last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1; - - if (start < pt_update_ops->start) - pt_update_ops->start = start; - if (last > pt_update_ops->last) - pt_update_ops->last = last; -} - -static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, - xe->info.tile_count); - - return 0; -} - -static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - struct llist_head *deferred = &pt_update_ops->deferred; - int err; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - pt_op->vma = NULL; - pt_op->bind = true; - pt_op->rebind = BIT(tile->id) & vma->tile_present; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - err = xe_pt_prepare_bind(tile, vma, pt_op->entries, - &pt_op->num_entries); - if (!err) { - xe_tile_assert(tile, pt_op->num_entries <= - ARRAY_SIZE(pt_op->entries)); - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, true); - - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. On LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * On !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!pt_op->rebind && xe_vm_has_scratch(vm) && - xe_vm_in_preempt_fence_mode(vm))) - pt_update_ops->needs_invalidation = true; - else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - - /* FIXME: Don't commit right away */ - vma->tile_staged |= BIT(tile->id); - pt_op->vma = vma; - xe_pt_commit_bind(vma, pt_op->entries, pt_op->num_entries, - pt_op->rebind, deferred); - } - - return err; -} - -static int unbind_op_prepare(struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - struct llist_head *deferred = &pt_update_ops->deferred; - int err; - - if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id))) - return 0; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - /* - * Wait for invalidation to complete. Can corrupt internal page table - * state if an invalidation is running while preparing an unbind. - */ - if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma))) - mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier); - - pt_op->vma = vma; - pt_op->bind = false; - pt_op->rebind = false; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries); - - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, false); - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - pt_update_ops->needs_invalidation = true; - - /* FIXME: Don't commit right away */ - xe_pt_commit_unbind(vma, pt_op->entries, pt_op->num_entries, - deferred); - - return 0; -} - -static int op_prepare(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op) -{ - int err = 0; - - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma); - pt_update_ops->wait_vm_kernel = true; - break; - case DRM_GPUVA_OP_REMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va)); - - if (!err && op->remap.prev) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.prev); - pt_update_ops->wait_vm_bookkeep = true; - } - if (!err && op->remap.next) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.next); - pt_update_ops->wait_vm_bookkeep = true; - } - break; - case DRM_GPUVA_OP_UNMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va)); - break; - case DRM_GPUVA_OP_PREFETCH: - err = bind_op_prepare(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va)); - pt_update_ops->wait_vm_kernel = true; - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static void -xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops) -{ - init_llist_head(&pt_update_ops->deferred); - pt_update_ops->start = ~0x0ull; - pt_update_ops->last = 0x0ull; -} - -/** - * xe_pt_update_ops_prepare() - Prepare PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Prepare PT update operations which includes updating internal PT state, - * allocate memory for page tables, populate page table being pruned in, and - * create PT update operations for leaf insertion / removal. - * - * Return: 0 on success, negative error code on error. - */ -int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct xe_vma_op *op; - int err; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - xe_pt_update_ops_init(pt_update_ops); - - err = dma_resv_reserve_fences(xe_vm_resv(vops->vm), - tile_to_xe(tile)->info.tile_count); - if (err) - return err; - - list_for_each_entry(op, &vops->list, link) { - err = op_prepare(vops->vm, tile, pt_update_ops, op); - - if (err) - return err; - } - - xe_tile_assert(tile, pt_update_ops->current_op <= - pt_update_ops->num_ops); - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE) - return -ENOSPC; -#endif - - return 0; -} - -static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - vma->tile_present |= BIT(tile->id); - vma->tile_staged &= ~BIT(tile->id); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - to_userptr_vma(vma)->userptr.initial_bind = true; - } - - /* - * Kick rebind worker if this bind triggers preempt fences and not in - * the rebind worker - */ - if (pt_update_ops->wait_vm_bookkeep && - xe_vm_in_preempt_fence_mode(vm) && - !current->mm) - xe_vm_queue_rebind_worker(vm); -} - -static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - vma->tile_present &= ~BIT(tile->id); - if (!vma->tile_present) { - list_del_init(&vma->combined_links.rebind); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - } -} - -static void op_commit(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op, struct dma_fence *fence) -{ - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence); - break; - case DRM_GPUVA_OP_REMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence); - - if (op->remap.prev) - bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence); - if (op->remap.next) - bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence); - break; - case DRM_GPUVA_OP_UNMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence); - break; - case DRM_GPUVA_OP_PREFETCH: - bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } -} - -static const struct xe_migrate_pt_update_ops migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -/** - * xe_pt_update_ops_run() - Run PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Run PT update operations which includes committing internal PT state changes, - * creating job for PT update operations for leaf insertion / removal, and - * installing job fence in various places. - * - * Return: fence on success, negative ERR_PTR on error. - */ -struct dma_fence * -xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm *vm = vops->vm; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL; - struct xe_range_fence *rfence; - struct xe_vma_op *op; - int err = 0; - struct xe_migrate_pt_update update = { - .ops = pt_update_ops->needs_userptr_lock ? - &userptr_migrate_ops : - &migrate_ops, - .vops = vops, - .tile_id = tile->id, - }; - - lockdep_assert_held(&vm->lock); - xe_vm_assert_held(vm); - - if (!pt_update_ops->current_op) { - xe_tile_assert(tile, xe_vm_in_fault_mode(vm)); - - return dma_fence_get_stub(); - } - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vm->xe->vm_inject_error_position == FORCE_OP_ERROR_RUN) - return ERR_PTR(-ENOSPC); -#endif - - if (pt_update_ops->needs_invalidation) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!ifence) - return ERR_PTR(-ENOMEM); - } - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - err = -ENOMEM; - goto free_ifence; - } - - fence = xe_migrate_update_pgtables(tile->migrate, &update); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - goto free_rfence; - } - - if (xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - pt_update_ops->start, - pt_update_ops->last, fence)) - dma_fence_wait(fence, false); - - /* tlb invalidation must be done before signaling rebind */ - if (ifence) { - err = invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, - vm->usm.asid); - if (err) - goto put_fence; - fence = &ifence->base.base; - } - - dma_resv_add_fence(xe_vm_resv(vm), fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, fence); - - if (pt_update_ops->needs_userptr_lock) - up_read(&vm->userptr.notifier_lock); - - return fence; - -put_fence: - if (pt_update_ops->needs_userptr_lock) - up_read(&vm->userptr.notifier_lock); - dma_fence_put(fence); -free_rfence: - kfree(rfence); -free_ifence: - kfree(ifence); - - return ERR_PTR(err); -} - -/** - * xe_pt_update_ops_fini() - Finish PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operations - * - * Finish PT update operations by committing to destroy page table memory - */ -void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - /* FIXME: Not 100% correct */ - for (i = 0; i < pt_update_ops->num_ops; ++i) { - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; - - if (pt_op->bind) - xe_pt_free_bind(pt_op->entries, pt_op->num_entries); - } - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); -} - -/** - * xe_pt_update_ops_abort() - Abort PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Abort PT update operations by unwinding internal PT state - */ -void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - /* FIXME: Just kill VM for now + cleanup PTs */ - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); - xe_vm_kill(vops->vm, false); -} diff --git a/rr-cache/7eb162052ab66184b34a71dcf9e3e031877cb1df/postimage b/rr-cache/7eb162052ab66184b34a71dcf9e3e031877cb1df/postimage deleted file mode 100644 index 4d17fb96e77c..000000000000 --- a/rr-cache/7eb162052ab66184b34a71dcf9e3e031877cb1df/postimage +++ /dev/null @@ -1,368 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * i.MX drm driver - parallel display implementation - * - * Copyright (C) 2012 Sascha Hauer, Pengutronix - */ - -#include <linux/component.h> -#include <linux/media-bus-format.h> -#include <linux/module.h> -#include <linux/of.h> -#include <linux/platform_device.h> -#include <linux/videodev2.h> - -#include <video/of_display_timing.h> - -#include <drm/drm_atomic_helper.h> -#include <drm/drm_bridge.h> -#include <drm/drm_managed.h> -#include <drm/drm_of.h> -#include <drm/drm_panel.h> -#include <drm/drm_probe_helper.h> -#include <drm/drm_simple_kms_helper.h> - -#include "imx-drm.h" - -struct imx_parallel_display_encoder { - struct drm_connector connector; - struct drm_encoder encoder; - struct drm_bridge bridge; - struct imx_parallel_display *pd; -}; - -struct imx_parallel_display { - struct device *dev; - u32 bus_format; - u32 bus_flags; - struct drm_display_mode mode; - struct drm_panel *panel; - struct drm_bridge *next_bridge; -}; - -static inline struct imx_parallel_display *con_to_imxpd(struct drm_connector *c) -{ - return container_of(c, struct imx_parallel_display_encoder, connector)->pd; -} - -static inline struct imx_parallel_display *bridge_to_imxpd(struct drm_bridge *b) -{ - return container_of(b, struct imx_parallel_display_encoder, bridge)->pd; -} - -static int imx_pd_connector_get_modes(struct drm_connector *connector) -{ - struct imx_parallel_display *imxpd = con_to_imxpd(connector); - struct device_node *np = imxpd->dev->of_node; - int num_modes; - - num_modes = drm_panel_get_modes(imxpd->panel, connector); - if (num_modes > 0) - return num_modes; - - if (np) { - struct drm_display_mode *mode = drm_mode_create(connector->dev); - int ret; - - if (!mode) - return 0; - - ret = of_get_drm_display_mode(np, &imxpd->mode, - &imxpd->bus_flags, - OF_USE_NATIVE_MODE); - if (ret) { - drm_mode_destroy(connector->dev, mode); - return 0; - } - - drm_mode_copy(mode, &imxpd->mode); - mode->type |= DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; - drm_mode_probed_add(connector, mode); - num_modes++; - } - - return num_modes; -} - -static void imx_pd_bridge_enable(struct drm_bridge *bridge) -{ - struct imx_parallel_display *imxpd = bridge_to_imxpd(bridge); - - drm_panel_prepare(imxpd->panel); - drm_panel_enable(imxpd->panel); -} - -static void imx_pd_bridge_disable(struct drm_bridge *bridge) -{ - struct imx_parallel_display *imxpd = bridge_to_imxpd(bridge); - - drm_panel_disable(imxpd->panel); - drm_panel_unprepare(imxpd->panel); -} - -static const u32 imx_pd_bus_fmts[] = { - MEDIA_BUS_FMT_RGB888_1X24, - MEDIA_BUS_FMT_BGR888_1X24, - MEDIA_BUS_FMT_GBR888_1X24, - MEDIA_BUS_FMT_RGB666_1X18, - MEDIA_BUS_FMT_RGB666_1X24_CPADHI, - MEDIA_BUS_FMT_RGB565_1X16, -}; - -static u32 * -imx_pd_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge, - struct drm_bridge_state *bridge_state, - struct drm_crtc_state *crtc_state, - struct drm_connector_state *conn_state, - unsigned int *num_output_fmts) -{ - struct drm_display_info *di = &conn_state->connector->display_info; - struct imx_parallel_display *imxpd = bridge_to_imxpd(bridge); - u32 *output_fmts; - - if (!imxpd->bus_format && !di->num_bus_formats) { - *num_output_fmts = ARRAY_SIZE(imx_pd_bus_fmts); - return kmemdup(imx_pd_bus_fmts, sizeof(imx_pd_bus_fmts), - GFP_KERNEL); - } - - *num_output_fmts = 1; - output_fmts = kmalloc(sizeof(*output_fmts), GFP_KERNEL); - if (!output_fmts) - return NULL; - - if (!imxpd->bus_format && di->num_bus_formats) - output_fmts[0] = di->bus_formats[0]; - else - output_fmts[0] = imxpd->bus_format; - - return output_fmts; -} - -static bool imx_pd_format_supported(u32 output_fmt) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(imx_pd_bus_fmts); i++) { - if (imx_pd_bus_fmts[i] == output_fmt) - return true; - } - - return false; -} - -static u32 * -imx_pd_bridge_atomic_get_input_bus_fmts(struct drm_bridge *bridge, - struct drm_bridge_state *bridge_state, - struct drm_crtc_state *crtc_state, - struct drm_connector_state *conn_state, - u32 output_fmt, - unsigned int *num_input_fmts) -{ - struct imx_parallel_display *imxpd = bridge_to_imxpd(bridge); - u32 *input_fmts; - - /* - * If the next bridge does not support bus format negotiation, let's - * use the static bus format definition (imxpd->bus_format) if it's - * specified, RGB888 when it's not. - */ - if (output_fmt == MEDIA_BUS_FMT_FIXED) - output_fmt = imxpd->bus_format ? : MEDIA_BUS_FMT_RGB888_1X24; - - /* Now make sure the requested output format is supported. */ - if ((imxpd->bus_format && imxpd->bus_format != output_fmt) || - !imx_pd_format_supported(output_fmt)) { - *num_input_fmts = 0; - return NULL; - } - - *num_input_fmts = 1; - input_fmts = kmalloc(sizeof(*input_fmts), GFP_KERNEL); - if (!input_fmts) - return NULL; - - input_fmts[0] = output_fmt; - return input_fmts; -} - -static int imx_pd_bridge_atomic_check(struct drm_bridge *bridge, - struct drm_bridge_state *bridge_state, - struct drm_crtc_state *crtc_state, - struct drm_connector_state *conn_state) -{ - struct imx_crtc_state *imx_crtc_state = to_imx_crtc_state(crtc_state); - struct drm_display_info *di = &conn_state->connector->display_info; - struct imx_parallel_display *imxpd = bridge_to_imxpd(bridge); - struct drm_bridge_state *next_bridge_state = NULL; - struct drm_bridge *next_bridge; - u32 bus_flags, bus_fmt; - - next_bridge = drm_bridge_get_next_bridge(bridge); - if (next_bridge) - next_bridge_state = drm_atomic_get_new_bridge_state(crtc_state->state, - next_bridge); - - if (next_bridge_state) - bus_flags = next_bridge_state->input_bus_cfg.flags; - else if (di->num_bus_formats) - bus_flags = di->bus_flags; - else - bus_flags = imxpd->bus_flags; - - bus_fmt = bridge_state->input_bus_cfg.format; - if (!imx_pd_format_supported(bus_fmt)) - return -EINVAL; - - bridge_state->output_bus_cfg.flags = bus_flags; - bridge_state->input_bus_cfg.flags = bus_flags; - imx_crtc_state->bus_flags = bus_flags; - imx_crtc_state->bus_format = bridge_state->input_bus_cfg.format; - imx_crtc_state->di_hsync_pin = 2; - imx_crtc_state->di_vsync_pin = 3; - - return 0; -} - -static const struct drm_connector_funcs imx_pd_connector_funcs = { - .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = imx_drm_connector_destroy, - .reset = drm_atomic_helper_connector_reset, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, -}; - -static const struct drm_connector_helper_funcs imx_pd_connector_helper_funcs = { - .get_modes = imx_pd_connector_get_modes, -}; - -static const struct drm_bridge_funcs imx_pd_bridge_funcs = { - .enable = imx_pd_bridge_enable, - .disable = imx_pd_bridge_disable, - .atomic_reset = drm_atomic_helper_bridge_reset, - .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, - .atomic_check = imx_pd_bridge_atomic_check, - .atomic_get_input_bus_fmts = imx_pd_bridge_atomic_get_input_bus_fmts, - .atomic_get_output_bus_fmts = imx_pd_bridge_atomic_get_output_bus_fmts, -}; - -static int imx_pd_bind(struct device *dev, struct device *master, void *data) -{ - struct drm_device *drm = data; - struct imx_parallel_display *imxpd = dev_get_drvdata(dev); - struct imx_parallel_display_encoder *imxpd_encoder; - struct drm_connector *connector; - struct drm_encoder *encoder; - struct drm_bridge *bridge; - int ret; - - imxpd_encoder = drmm_simple_encoder_alloc(drm, struct imx_parallel_display_encoder, - encoder, DRM_MODE_ENCODER_NONE); - if (IS_ERR(imxpd_encoder)) - return PTR_ERR(imxpd_encoder); - - imxpd_encoder->pd = imxpd; - connector = &imxpd_encoder->connector; - encoder = &imxpd_encoder->encoder; - bridge = &imxpd_encoder->bridge; - - ret = imx_drm_encoder_parse_of(drm, encoder, imxpd->dev->of_node); - if (ret) - return ret; - - /* set the connector's dpms to OFF so that - * drm_helper_connector_dpms() won't return - * immediately since the current state is ON - * at this point. - */ - connector->dpms = DRM_MODE_DPMS_OFF; - - bridge->funcs = &imx_pd_bridge_funcs; - drm_bridge_attach(encoder, bridge, NULL, 0); - - if (imxpd->next_bridge) { - ret = drm_bridge_attach(encoder, imxpd->next_bridge, bridge, 0); - if (ret < 0) - return ret; - } else { - drm_connector_helper_add(connector, - &imx_pd_connector_helper_funcs); - drm_connector_init(drm, connector, &imx_pd_connector_funcs, - DRM_MODE_CONNECTOR_DPI); - - drm_connector_attach_encoder(connector, encoder); - } - - return 0; -} - -static const struct component_ops imx_pd_ops = { - .bind = imx_pd_bind, -}; - -static int imx_pd_probe(struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; - struct imx_parallel_display *imxpd; - int ret; - u32 bus_format = 0; - const char *fmt; - - imxpd = devm_kzalloc(dev, sizeof(*imxpd), GFP_KERNEL); - if (!imxpd) - return -ENOMEM; - - /* port@1 is the output port */ - ret = drm_of_find_panel_or_bridge(np, 1, 0, &imxpd->panel, - &imxpd->next_bridge); - if (ret && ret != -ENODEV) - return ret; - - ret = of_property_read_string(np, "interface-pix-fmt", &fmt); - if (!ret) { - if (!strcmp(fmt, "rgb24")) - bus_format = MEDIA_BUS_FMT_RGB888_1X24; - else if (!strcmp(fmt, "rgb565")) - bus_format = MEDIA_BUS_FMT_RGB565_1X16; - else if (!strcmp(fmt, "bgr666")) - bus_format = MEDIA_BUS_FMT_RGB666_1X18; - else if (!strcmp(fmt, "lvds666")) - bus_format = MEDIA_BUS_FMT_RGB666_1X24_CPADHI; - } - imxpd->bus_format = bus_format; - - imxpd->dev = dev; - - platform_set_drvdata(pdev, imxpd); - - return component_add(dev, &imx_pd_ops); -} - -static void imx_pd_remove(struct platform_device *pdev) -{ - component_del(&pdev->dev, &imx_pd_ops); -} - -static const struct of_device_id imx_pd_dt_ids[] = { - { .compatible = "fsl,imx-parallel-display", }, - { /* sentinel */ } -}; -MODULE_DEVICE_TABLE(of, imx_pd_dt_ids); - -static struct platform_driver imx_pd_driver = { - .probe = imx_pd_probe, - .remove_new = imx_pd_remove, - .driver = { - .of_match_table = imx_pd_dt_ids, - .name = "imx-parallel-display", - }, -}; - -module_platform_driver(imx_pd_driver); - -MODULE_DESCRIPTION("i.MX parallel display driver"); -MODULE_AUTHOR("Sascha Hauer, Pengutronix"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:imx-parallel-display"); diff --git a/rr-cache/7eb162052ab66184b34a71dcf9e3e031877cb1df/preimage b/rr-cache/7eb162052ab66184b34a71dcf9e3e031877cb1df/preimage deleted file mode 100644 index 87cde0b888bc..000000000000 --- a/rr-cache/7eb162052ab66184b34a71dcf9e3e031877cb1df/preimage +++ /dev/null @@ -1,391 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * i.MX drm driver - parallel display implementation - * - * Copyright (C) 2012 Sascha Hauer, Pengutronix - */ - -#include <linux/component.h> -#include <linux/media-bus-format.h> -#include <linux/module.h> -#include <linux/of.h> -#include <linux/platform_device.h> -#include <linux/videodev2.h> - -#include <video/of_display_timing.h> - -#include <drm/drm_atomic_helper.h> -#include <drm/drm_bridge.h> -#include <drm/drm_managed.h> -#include <drm/drm_of.h> -#include <drm/drm_panel.h> -#include <drm/drm_probe_helper.h> -#include <drm/drm_simple_kms_helper.h> - -#include "imx-drm.h" - -struct imx_parallel_display_encoder { - struct drm_connector connector; - struct drm_encoder encoder; - struct drm_bridge bridge; - struct imx_parallel_display *pd; -}; - -struct imx_parallel_display { - struct device *dev; -<<<<<<< -======= - const struct drm_edid *drm_edid; ->>>>>>> - u32 bus_format; - u32 bus_flags; - struct drm_display_mode mode; - struct drm_panel *panel; - struct drm_bridge *next_bridge; -}; - -static inline struct imx_parallel_display *con_to_imxpd(struct drm_connector *c) -{ - return container_of(c, struct imx_parallel_display_encoder, connector)->pd; -} - -static inline struct imx_parallel_display *bridge_to_imxpd(struct drm_bridge *b) -{ - return container_of(b, struct imx_parallel_display_encoder, bridge)->pd; -} - -static int imx_pd_connector_get_modes(struct drm_connector *connector) -{ - struct imx_parallel_display *imxpd = con_to_imxpd(connector); - struct device_node *np = imxpd->dev->of_node; - int num_modes; - - num_modes = drm_panel_get_modes(imxpd->panel, connector); - if (num_modes > 0) - return num_modes; - -<<<<<<< -======= - if (imxpd->drm_edid) { - drm_edid_connector_update(connector, imxpd->drm_edid); - num_modes = drm_edid_connector_add_modes(connector); - } - ->>>>>>> - if (np) { - struct drm_display_mode *mode = drm_mode_create(connector->dev); - int ret; - - if (!mode) - return 0; - - ret = of_get_drm_display_mode(np, &imxpd->mode, - &imxpd->bus_flags, - OF_USE_NATIVE_MODE); - if (ret) { - drm_mode_destroy(connector->dev, mode); - return 0; - } - - drm_mode_copy(mode, &imxpd->mode); - mode->type |= DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED; - drm_mode_probed_add(connector, mode); - num_modes++; - } - - return num_modes; -} - -static void imx_pd_bridge_enable(struct drm_bridge *bridge) -{ - struct imx_parallel_display *imxpd = bridge_to_imxpd(bridge); - - drm_panel_prepare(imxpd->panel); - drm_panel_enable(imxpd->panel); -} - -static void imx_pd_bridge_disable(struct drm_bridge *bridge) -{ - struct imx_parallel_display *imxpd = bridge_to_imxpd(bridge); - - drm_panel_disable(imxpd->panel); - drm_panel_unprepare(imxpd->panel); -} - -static const u32 imx_pd_bus_fmts[] = { - MEDIA_BUS_FMT_RGB888_1X24, - MEDIA_BUS_FMT_BGR888_1X24, - MEDIA_BUS_FMT_GBR888_1X24, - MEDIA_BUS_FMT_RGB666_1X18, - MEDIA_BUS_FMT_RGB666_1X24_CPADHI, - MEDIA_BUS_FMT_RGB565_1X16, -}; - -static u32 * -imx_pd_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge, - struct drm_bridge_state *bridge_state, - struct drm_crtc_state *crtc_state, - struct drm_connector_state *conn_state, - unsigned int *num_output_fmts) -{ - struct drm_display_info *di = &conn_state->connector->display_info; - struct imx_parallel_display *imxpd = bridge_to_imxpd(bridge); - u32 *output_fmts; - - if (!imxpd->bus_format && !di->num_bus_formats) { - *num_output_fmts = ARRAY_SIZE(imx_pd_bus_fmts); - return kmemdup(imx_pd_bus_fmts, sizeof(imx_pd_bus_fmts), - GFP_KERNEL); - } - - *num_output_fmts = 1; - output_fmts = kmalloc(sizeof(*output_fmts), GFP_KERNEL); - if (!output_fmts) - return NULL; - - if (!imxpd->bus_format && di->num_bus_formats) - output_fmts[0] = di->bus_formats[0]; - else - output_fmts[0] = imxpd->bus_format; - - return output_fmts; -} - -static bool imx_pd_format_supported(u32 output_fmt) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(imx_pd_bus_fmts); i++) { - if (imx_pd_bus_fmts[i] == output_fmt) - return true; - } - - return false; -} - -static u32 * -imx_pd_bridge_atomic_get_input_bus_fmts(struct drm_bridge *bridge, - struct drm_bridge_state *bridge_state, - struct drm_crtc_state *crtc_state, - struct drm_connector_state *conn_state, - u32 output_fmt, - unsigned int *num_input_fmts) -{ - struct imx_parallel_display *imxpd = bridge_to_imxpd(bridge); - u32 *input_fmts; - - /* - * If the next bridge does not support bus format negotiation, let's - * use the static bus format definition (imxpd->bus_format) if it's - * specified, RGB888 when it's not. - */ - if (output_fmt == MEDIA_BUS_FMT_FIXED) - output_fmt = imxpd->bus_format ? : MEDIA_BUS_FMT_RGB888_1X24; - - /* Now make sure the requested output format is supported. */ - if ((imxpd->bus_format && imxpd->bus_format != output_fmt) || - !imx_pd_format_supported(output_fmt)) { - *num_input_fmts = 0; - return NULL; - } - - *num_input_fmts = 1; - input_fmts = kmalloc(sizeof(*input_fmts), GFP_KERNEL); - if (!input_fmts) - return NULL; - - input_fmts[0] = output_fmt; - return input_fmts; -} - -static int imx_pd_bridge_atomic_check(struct drm_bridge *bridge, - struct drm_bridge_state *bridge_state, - struct drm_crtc_state *crtc_state, - struct drm_connector_state *conn_state) -{ - struct imx_crtc_state *imx_crtc_state = to_imx_crtc_state(crtc_state); - struct drm_display_info *di = &conn_state->connector->display_info; - struct imx_parallel_display *imxpd = bridge_to_imxpd(bridge); - struct drm_bridge_state *next_bridge_state = NULL; - struct drm_bridge *next_bridge; - u32 bus_flags, bus_fmt; - - next_bridge = drm_bridge_get_next_bridge(bridge); - if (next_bridge) - next_bridge_state = drm_atomic_get_new_bridge_state(crtc_state->state, - next_bridge); - - if (next_bridge_state) - bus_flags = next_bridge_state->input_bus_cfg.flags; - else if (di->num_bus_formats) - bus_flags = di->bus_flags; - else - bus_flags = imxpd->bus_flags; - - bus_fmt = bridge_state->input_bus_cfg.format; - if (!imx_pd_format_supported(bus_fmt)) - return -EINVAL; - - bridge_state->output_bus_cfg.flags = bus_flags; - bridge_state->input_bus_cfg.flags = bus_flags; - imx_crtc_state->bus_flags = bus_flags; - imx_crtc_state->bus_format = bridge_state->input_bus_cfg.format; - imx_crtc_state->di_hsync_pin = 2; - imx_crtc_state->di_vsync_pin = 3; - - return 0; -} - -static const struct drm_connector_funcs imx_pd_connector_funcs = { - .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = imx_drm_connector_destroy, - .reset = drm_atomic_helper_connector_reset, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, -}; - -static const struct drm_connector_helper_funcs imx_pd_connector_helper_funcs = { - .get_modes = imx_pd_connector_get_modes, -}; - -static const struct drm_bridge_funcs imx_pd_bridge_funcs = { - .enable = imx_pd_bridge_enable, - .disable = imx_pd_bridge_disable, - .atomic_reset = drm_atomic_helper_bridge_reset, - .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, - .atomic_check = imx_pd_bridge_atomic_check, - .atomic_get_input_bus_fmts = imx_pd_bridge_atomic_get_input_bus_fmts, - .atomic_get_output_bus_fmts = imx_pd_bridge_atomic_get_output_bus_fmts, -}; - -static int imx_pd_bind(struct device *dev, struct device *master, void *data) -{ - struct drm_device *drm = data; - struct imx_parallel_display *imxpd = dev_get_drvdata(dev); - struct imx_parallel_display_encoder *imxpd_encoder; - struct drm_connector *connector; - struct drm_encoder *encoder; - struct drm_bridge *bridge; - int ret; - - imxpd_encoder = drmm_simple_encoder_alloc(drm, struct imx_parallel_display_encoder, - encoder, DRM_MODE_ENCODER_NONE); - if (IS_ERR(imxpd_encoder)) - return PTR_ERR(imxpd_encoder); - - imxpd_encoder->pd = imxpd; - connector = &imxpd_encoder->connector; - encoder = &imxpd_encoder->encoder; - bridge = &imxpd_encoder->bridge; - - ret = imx_drm_encoder_parse_of(drm, encoder, imxpd->dev->of_node); - if (ret) - return ret; - - /* set the connector's dpms to OFF so that - * drm_helper_connector_dpms() won't return - * immediately since the current state is ON - * at this point. - */ - connector->dpms = DRM_MODE_DPMS_OFF; - - bridge->funcs = &imx_pd_bridge_funcs; - drm_bridge_attach(encoder, bridge, NULL, 0); - - if (imxpd->next_bridge) { - ret = drm_bridge_attach(encoder, imxpd->next_bridge, bridge, 0); - if (ret < 0) - return ret; - } else { - drm_connector_helper_add(connector, - &imx_pd_connector_helper_funcs); - drm_connector_init(drm, connector, &imx_pd_connector_funcs, - DRM_MODE_CONNECTOR_DPI); - - drm_connector_attach_encoder(connector, encoder); - } - - return 0; -} - -static const struct component_ops imx_pd_ops = { - .bind = imx_pd_bind, -}; - -static int imx_pd_probe(struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; - struct imx_parallel_display *imxpd; - int ret; - u32 bus_format = 0; - const char *fmt; - - imxpd = devm_kzalloc(dev, sizeof(*imxpd), GFP_KERNEL); - if (!imxpd) - return -ENOMEM; - - /* port@1 is the output port */ - ret = drm_of_find_panel_or_bridge(np, 1, 0, &imxpd->panel, - &imxpd->next_bridge); - if (ret && ret != -ENODEV) - return ret; - -<<<<<<< -======= - edidp = of_get_property(np, "edid", &edid_len); - if (edidp) - imxpd->drm_edid = drm_edid_alloc(edidp, edid_len); - ->>>>>>> - ret = of_property_read_string(np, "interface-pix-fmt", &fmt); - if (!ret) { - if (!strcmp(fmt, "rgb24")) - bus_format = MEDIA_BUS_FMT_RGB888_1X24; - else if (!strcmp(fmt, "rgb565")) - bus_format = MEDIA_BUS_FMT_RGB565_1X16; - else if (!strcmp(fmt, "bgr666")) - bus_format = MEDIA_BUS_FMT_RGB666_1X18; - else if (!strcmp(fmt, "lvds666")) - bus_format = MEDIA_BUS_FMT_RGB666_1X24_CPADHI; - } - imxpd->bus_format = bus_format; - - imxpd->dev = dev; - - platform_set_drvdata(pdev, imxpd); - - return component_add(dev, &imx_pd_ops); -} - -static void imx_pd_remove(struct platform_device *pdev) -{ - struct imx_parallel_display *imxpd = platform_get_drvdata(pdev); - - component_del(&pdev->dev, &imx_pd_ops); - - drm_edid_free(imxpd->drm_edid); -} - -static const struct of_device_id imx_pd_dt_ids[] = { - { .compatible = "fsl,imx-parallel-display", }, - { /* sentinel */ } -}; -MODULE_DEVICE_TABLE(of, imx_pd_dt_ids); - -static struct platform_driver imx_pd_driver = { - .probe = imx_pd_probe, - .remove_new = imx_pd_remove, - .driver = { - .of_match_table = imx_pd_dt_ids, - .name = "imx-parallel-display", - }, -}; - -module_platform_driver(imx_pd_driver); - -MODULE_DESCRIPTION("i.MX parallel display driver"); -MODULE_AUTHOR("Sascha Hauer, Pengutronix"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:imx-parallel-display"); diff --git a/rr-cache/97ec3e6c58a0b80be4db896ab4c8159948f85ffe/postimage b/rr-cache/97ec3e6c58a0b80be4db896ab4c8159948f85ffe/postimage deleted file mode 100644 index aa11728e7e79..000000000000 --- a/rr-cache/97ec3e6c58a0b80be4db896ab4c8159948f85ffe/postimage +++ /dev/null @@ -1,860 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2023 Intel Corporation - */ - -#include <linux/hwmon-sysfs.h> -#include <linux/hwmon.h> -#include <linux/types.h> - -#include <drm/drm_managed.h> -#include "regs/xe_gt_regs.h" -#include "regs/xe_mchbar_regs.h" -#include "regs/xe_pcode_regs.h" -#include "xe_device.h" -#include "xe_hwmon.h" -#include "xe_mmio.h" -#include "xe_pcode.h" -#include "xe_pcode_api.h" -#include "xe_sriov.h" -#include "xe_pm.h" - -enum xe_hwmon_reg { - REG_PKG_RAPL_LIMIT, - REG_PKG_POWER_SKU, - REG_PKG_POWER_SKU_UNIT, - REG_GT_PERF_STATUS, - REG_PKG_ENERGY_STATUS, -}; - -enum xe_hwmon_reg_operation { - REG_READ32, - REG_RMW32, - REG_READ64, -}; - -enum xe_hwmon_channel { - CHANNEL_CARD, - CHANNEL_PKG, - CHANNEL_MAX, -}; - -/* - * SF_* - scale factors for particular quantities according to hwmon spec. - */ -#define SF_POWER 1000000 /* microwatts */ -#define SF_CURR 1000 /* milliamperes */ -#define SF_VOLTAGE 1000 /* millivolts */ -#define SF_ENERGY 1000000 /* microjoules */ -#define SF_TIME 1000 /* milliseconds */ - -/** - * struct xe_hwmon_energy_info - to accumulate energy - */ -struct xe_hwmon_energy_info { - /** @reg_val_prev: previous energy reg val */ - u32 reg_val_prev; - /** @accum_energy: accumulated energy */ - long accum_energy; -}; - -/** - * struct xe_hwmon - xe hwmon data structure - */ -struct xe_hwmon { - /** @hwmon_dev: hwmon device for xe */ - struct device *hwmon_dev; - /** @xe: Xe device */ - struct xe_device *xe; - /** @hwmon_lock: lock for rw attributes*/ - struct mutex hwmon_lock; - /** @scl_shift_power: pkg power unit */ - int scl_shift_power; - /** @scl_shift_energy: pkg energy unit */ - int scl_shift_energy; - /** @scl_shift_time: pkg time unit */ - int scl_shift_time; - /** @ei: Energy info for energyN_input */ - struct xe_hwmon_energy_info ei[CHANNEL_MAX]; -}; - -static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, - int channel) -{ - struct xe_device *xe = hwmon->xe; - - switch (hwmon_reg) { - case REG_PKG_RAPL_LIMIT: - if (xe->info.platform == XE_BATTLEMAGE) { - if (channel == CHANNEL_PKG) - return BMG_PACKAGE_RAPL_LIMIT; - else - return BMG_PLATFORM_POWER_LIMIT; - } else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) { - return PVC_GT0_PACKAGE_RAPL_LIMIT; - } else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) { - return PCU_CR_PACKAGE_RAPL_LIMIT; - } - break; - case REG_PKG_POWER_SKU: - if (xe->info.platform == XE_BATTLEMAGE) - return BMG_PACKAGE_POWER_SKU; - else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) - return PVC_GT0_PACKAGE_POWER_SKU; - else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) - return PCU_CR_PACKAGE_POWER_SKU; - break; - case REG_PKG_POWER_SKU_UNIT: - if (xe->info.platform == XE_BATTLEMAGE) - return BMG_PACKAGE_POWER_SKU_UNIT; - else if (xe->info.platform == XE_PVC) - return PVC_GT0_PACKAGE_POWER_SKU_UNIT; - else if (xe->info.platform == XE_DG2) - return PCU_CR_PACKAGE_POWER_SKU_UNIT; - break; - case REG_GT_PERF_STATUS: - if (xe->info.platform == XE_DG2 && channel == CHANNEL_PKG) - return GT_PERF_STATUS; - break; - case REG_PKG_ENERGY_STATUS: - if (xe->info.platform == XE_BATTLEMAGE) { - if (channel == CHANNEL_PKG) - return BMG_PACKAGE_ENERGY_STATUS; - else - return BMG_PLATFORM_ENERGY_STATUS; - } else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) { - return PVC_GT0_PLATFORM_ENERGY_STATUS; - } else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) { - return PCU_CR_PACKAGE_ENERGY_STATUS; - } - break; - default: - drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg); - break; - } - - return XE_REG(0); -} - -#define PL1_DISABLE 0 - -/* - * HW allows arbitrary PL1 limits to be set but silently clamps these values to - * "typical but not guaranteed" min/max values in REG_PKG_POWER_SKU. Follow the - * same pattern for sysfs, allow arbitrary PL1 limits to be set but display - * clamped values when read. - */ -static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *value) -{ - u64 reg_val, min, max; - struct xe_device *xe = hwmon->xe; - struct xe_reg rapl_limit, pkg_power_sku; - struct xe_gt *mmio = xe_root_mmio_gt(xe); - - rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); - pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); - - /* - * Valid check of REG_PKG_RAPL_LIMIT is already done in xe_hwmon_power_is_visible. - * So not checking it again here. - */ - if (!xe_reg_is_valid(pkg_power_sku)) { - drm_warn(&xe->drm, "pkg_power_sku invalid\n"); - *value = 0; - return; - } - - mutex_lock(&hwmon->hwmon_lock); - - reg_val = xe_mmio_read32(mmio, rapl_limit); - /* Check if PL1 limit is disabled */ - if (!(reg_val & PKG_PWR_LIM_1_EN)) { - *value = PL1_DISABLE; - goto unlock; - } - - reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val); - *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); - - reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku); - min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); - min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); - max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); - max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); - - if (min && max) - *value = clamp_t(u64, *value, min, max); -unlock: - mutex_unlock(&hwmon->hwmon_lock); -} - -static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long value) -{ - struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); - int ret = 0; - u64 reg_val; - struct xe_reg rapl_limit; - - rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); - - mutex_lock(&hwmon->hwmon_lock); - - /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */ - if (value == PL1_DISABLE) { - reg_val = xe_mmio_rmw32(mmio, rapl_limit, PKG_PWR_LIM_1_EN, 0); - reg_val = xe_mmio_read32(mmio, rapl_limit); - if (reg_val & PKG_PWR_LIM_1_EN) { - drm_warn(&hwmon->xe->drm, "PL1 disable is not supported!\n"); - ret = -EOPNOTSUPP; - } - goto unlock; - } - - /* Computation in 64-bits to avoid overflow. Round to nearest. */ - reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); - reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val); - reg_val = xe_mmio_rmw32(mmio, rapl_limit, PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val); - -unlock: - mutex_unlock(&hwmon->hwmon_lock); - return ret; -} - -static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, long *value) -{ - struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); - struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); - u64 reg_val; - - /* - * This sysfs file won't be visible if REG_PKG_POWER_SKU is invalid, so valid check - * for this register can be skipped. - * See xe_hwmon_power_is_visible. - */ - reg_val = xe_mmio_read32(mmio, reg); - reg_val = REG_FIELD_GET(PKG_TDP, reg_val); - *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); -} - -/* - * xe_hwmon_energy_get - Obtain energy value - * - * The underlying energy hardware register is 32-bits and is subject to - * overflow. How long before overflow? For example, with an example - * scaling bit shift of 14 bits (see register *PACKAGE_POWER_SKU_UNIT) and - * a power draw of 1000 watts, the 32-bit counter will overflow in - * approximately 4.36 minutes. - * - * Examples: - * 1 watt: (2^32 >> 14) / 1 W / (60 * 60 * 24) secs/day -> 3 days - * 1000 watts: (2^32 >> 14) / 1000 W / 60 secs/min -> 4.36 minutes - * - * The function significantly increases overflow duration (from 4.36 - * minutes) by accumulating the energy register into a 'long' as allowed by - * the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()), - * a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and - * hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before - * energyN_input overflows. This at 1000 W is an overflow duration of 278 years. - */ -static void -xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy) -{ - struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); - struct xe_hwmon_energy_info *ei = &hwmon->ei[channel]; - u64 reg_val; - - reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, - channel)); - - if (reg_val >= ei->reg_val_prev) - ei->accum_energy += reg_val - ei->reg_val_prev; - else - ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val; - - ei->reg_val_prev = reg_val; - - *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY, - hwmon->scl_shift_energy); -} - -static ssize_t -xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct xe_hwmon *hwmon = dev_get_drvdata(dev); - struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); - u32 x, y, x_w = 2; /* 2 bits */ - u64 r, tau4, out; - int sensor_index = to_sensor_dev_attr(attr)->index; - - xe_pm_runtime_get(hwmon->xe); - - mutex_lock(&hwmon->hwmon_lock); - - r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index)); - - mutex_unlock(&hwmon->hwmon_lock); - - xe_pm_runtime_put(hwmon->xe); - - x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r); - y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r); - - /* - * tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17) - * = (4 | x) << (y - 2) - * - * Here (y - 2) ensures a 1.x fixed point representation of 1.x - * As x is 2 bits so 1.x can be 1.0, 1.25, 1.50, 1.75 - * - * As y can be < 2, we compute tau4 = (4 | x) << y - * and then add 2 when doing the final right shift to account for units - */ - tau4 = (u64)((1 << x_w) | x) << y; - - /* val in hwmon interface units (millisec) */ - out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); - - return sysfs_emit(buf, "%llu\n", out); -} - -static ssize_t -xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct xe_hwmon *hwmon = dev_get_drvdata(dev); - struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); - u32 x, y, rxy, x_w = 2; /* 2 bits */ - u64 tau4, r, max_win; - unsigned long val; - int ret; - int sensor_index = to_sensor_dev_attr(attr)->index; - - ret = kstrtoul(buf, 0, &val); - if (ret) - return ret; - - /* - * Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12. - * The hwmon->scl_shift_time default of 0xa results in a max tau of 256 seconds. - * - * The ideal scenario is for PKG_MAX_WIN to be read from the PKG_PWR_SKU register. - * However, it is observed that existing discrete GPUs does not provide correct - * PKG_MAX_WIN value, therefore a using default constant value. For future discrete GPUs - * this may get resolved, in which case PKG_MAX_WIN should be obtained from PKG_PWR_SKU. - */ -#define PKG_MAX_WIN_DEFAULT 0x12ull - - /* - * val must be < max in hwmon interface units. The steps below are - * explained in xe_hwmon_power_max_interval_show() - */ - r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT); - x = REG_FIELD_GET(PKG_MAX_WIN_X, r); - y = REG_FIELD_GET(PKG_MAX_WIN_Y, r); - tau4 = (u64)((1 << x_w) | x) << y; - max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); - - if (val > max_win) - return -EINVAL; - - /* val in hw units */ - val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME); - - /* - * Convert val to 1.x * power(2,y) - * y = ilog2(val) - * x = (val - (1 << y)) >> (y - 2) - */ - if (!val) { - y = 0; - x = 0; - } else { - y = ilog2(val); - x = (val - (1ul << y)) << x_w >> y; - } - - rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y); - - xe_pm_runtime_get(hwmon->xe); - - mutex_lock(&hwmon->hwmon_lock); - - r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index), - PKG_PWR_LIM_1_TIME, rxy); - - mutex_unlock(&hwmon->hwmon_lock); - - xe_pm_runtime_put(hwmon->xe); - - return count; -} - -static SENSOR_DEVICE_ATTR(power1_max_interval, 0664, - xe_hwmon_power_max_interval_show, - xe_hwmon_power_max_interval_store, CHANNEL_CARD); - -static SENSOR_DEVICE_ATTR(power2_max_interval, 0664, - xe_hwmon_power_max_interval_show, - xe_hwmon_power_max_interval_store, CHANNEL_PKG); - -static struct attribute *hwmon_attributes[] = { - &sensor_dev_attr_power1_max_interval.dev_attr.attr, - &sensor_dev_attr_power2_max_interval.dev_attr.attr, - NULL -}; - -static umode_t xe_hwmon_attributes_visible(struct kobject *kobj, - struct attribute *attr, int index) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_hwmon *hwmon = dev_get_drvdata(dev); - int ret = 0; - - xe_pm_runtime_get(hwmon->xe); - - ret = xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index)) ? attr->mode : 0; - - xe_pm_runtime_put(hwmon->xe); - - return ret; -} - -static const struct attribute_group hwmon_attrgroup = { - .attrs = hwmon_attributes, - .is_visible = xe_hwmon_attributes_visible, -}; - -static const struct attribute_group *hwmon_groups[] = { - &hwmon_attrgroup, - NULL -}; - -static const struct hwmon_channel_info * const hwmon_info[] = { - HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL, - HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT | HWMON_P_LABEL), - HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL), - HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL), - HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL), - NULL -}; - -/* I1 is exposed as power_crit or as curr_crit depending on bit 31 */ -static int xe_hwmon_pcode_read_i1(const struct xe_hwmon *hwmon, u32 *uval) -{ - struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); - - /* Avoid Illegal Subcommand error */ - if (hwmon->xe->info.platform == XE_DG2) - return -ENXIO; - - return xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, - POWER_SETUP_SUBCOMMAND_READ_I1, 0), - uval, NULL); -} - -static int xe_hwmon_pcode_write_i1(const struct xe_hwmon *hwmon, u32 uval) -{ - struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); - - return xe_pcode_write(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, - POWER_SETUP_SUBCOMMAND_WRITE_I1, 0), - (uval & POWER_SETUP_I1_DATA_MASK)); -} - -static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel, - long *value, u32 scale_factor) -{ - int ret; - u32 uval; - - mutex_lock(&hwmon->hwmon_lock); - - ret = xe_hwmon_pcode_read_i1(hwmon, &uval); - if (ret) - goto unlock; - - *value = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval), - scale_factor, POWER_SETUP_I1_SHIFT); -unlock: - mutex_unlock(&hwmon->hwmon_lock); - return ret; -} - -static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel, - long value, u32 scale_factor) -{ - int ret; - u32 uval; - - mutex_lock(&hwmon->hwmon_lock); - - uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor); - ret = xe_hwmon_pcode_write_i1(hwmon, uval); - - mutex_unlock(&hwmon->hwmon_lock); - return ret; -} - -static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, int channel, long *value) -{ - struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); - u64 reg_val; - - reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, channel)); - /* HW register value in units of 2.5 millivolt */ - *value = DIV_ROUND_CLOSEST(REG_FIELD_GET(VOLTAGE_MASK, reg_val) * 2500, SF_VOLTAGE); -} - -static umode_t -xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) -{ - u32 uval; - - switch (attr) { - case hwmon_power_max: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, - channel)) ? 0664 : 0; - case hwmon_power_rated_max: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, - channel)) ? 0444 : 0; - case hwmon_power_crit: - if (channel == CHANNEL_PKG) - return (xe_hwmon_pcode_read_i1(hwmon, &uval) || - !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; - break; - case hwmon_power_label: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, - channel)) ? 0444 : 0; - default: - return 0; - } - return 0; -} - -static int -xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) -{ - switch (attr) { - case hwmon_power_max: - xe_hwmon_power_max_read(hwmon, channel, val); - return 0; - case hwmon_power_rated_max: - xe_hwmon_power_rated_max_read(hwmon, channel, val); - return 0; - case hwmon_power_crit: - return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_POWER); - default: - return -EOPNOTSUPP; - } -} - -static int -xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val) -{ - switch (attr) { - case hwmon_power_max: - return xe_hwmon_power_max_write(hwmon, channel, val); - case hwmon_power_crit: - return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_POWER); - default: - return -EOPNOTSUPP; - } -} - -static umode_t -xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr, int channel) -{ - u32 uval; - - /* hwmon sysfs attribute of current available only for package */ - if (channel != CHANNEL_PKG) - return 0; - - switch (attr) { - case hwmon_curr_crit: - return (xe_hwmon_pcode_read_i1(hwmon, &uval) || - (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; - case hwmon_curr_label: - return (xe_hwmon_pcode_read_i1(hwmon, &uval) || - (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0444; - break; - default: - return 0; - } - return 0; -} - -static int -xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) -{ - switch (attr) { - case hwmon_curr_crit: - return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_CURR); - default: - return -EOPNOTSUPP; - } -} - -static int -xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val) -{ - switch (attr) { - case hwmon_curr_crit: - return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_CURR); - default: - return -EOPNOTSUPP; - } -} - -static umode_t -xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) -{ - switch (attr) { - case hwmon_in_input: - case hwmon_in_label: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, - channel)) ? 0444 : 0; - default: - return 0; - } -} - -static int -xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) -{ - switch (attr) { - case hwmon_in_input: - xe_hwmon_get_voltage(hwmon, channel, val); - return 0; - default: - return -EOPNOTSUPP; - } -} - -static umode_t -xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) -{ - switch (attr) { - case hwmon_energy_input: - case hwmon_energy_label: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, - channel)) ? 0444 : 0; - default: - return 0; - } -} - -static int -xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) -{ - switch (attr) { - case hwmon_energy_input: - xe_hwmon_energy_get(hwmon, channel, val); - return 0; - default: - return -EOPNOTSUPP; - } -} - -static umode_t -xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, - u32 attr, int channel) -{ - struct xe_hwmon *hwmon = (struct xe_hwmon *)drvdata; - int ret; - - xe_pm_runtime_get(hwmon->xe); - - switch (type) { - case hwmon_power: - ret = xe_hwmon_power_is_visible(hwmon, attr, channel); - break; - case hwmon_curr: - ret = xe_hwmon_curr_is_visible(hwmon, attr, channel); - break; - case hwmon_in: - ret = xe_hwmon_in_is_visible(hwmon, attr, channel); - break; - case hwmon_energy: - ret = xe_hwmon_energy_is_visible(hwmon, attr, channel); - break; - default: - ret = 0; - break; - } - - xe_pm_runtime_put(hwmon->xe); - - return ret; -} - -static int -xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, - int channel, long *val) -{ - struct xe_hwmon *hwmon = dev_get_drvdata(dev); - int ret; - - xe_pm_runtime_get(hwmon->xe); - - switch (type) { - case hwmon_power: - ret = xe_hwmon_power_read(hwmon, attr, channel, val); - break; - case hwmon_curr: - ret = xe_hwmon_curr_read(hwmon, attr, channel, val); - break; - case hwmon_in: - ret = xe_hwmon_in_read(hwmon, attr, channel, val); - break; - case hwmon_energy: - ret = xe_hwmon_energy_read(hwmon, attr, channel, val); - break; - default: - ret = -EOPNOTSUPP; - break; - } - - xe_pm_runtime_put(hwmon->xe); - - return ret; -} - -static int -xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, - int channel, long val) -{ - struct xe_hwmon *hwmon = dev_get_drvdata(dev); - int ret; - - xe_pm_runtime_get(hwmon->xe); - - switch (type) { - case hwmon_power: - ret = xe_hwmon_power_write(hwmon, attr, channel, val); - break; - case hwmon_curr: - ret = xe_hwmon_curr_write(hwmon, attr, channel, val); - break; - default: - ret = -EOPNOTSUPP; - break; - } - - xe_pm_runtime_put(hwmon->xe); - - return ret; -} - -static int xe_hwmon_read_label(struct device *dev, - enum hwmon_sensor_types type, - u32 attr, int channel, const char **str) -{ - switch (type) { - case hwmon_power: - case hwmon_energy: - case hwmon_curr: - case hwmon_in: - if (channel == CHANNEL_CARD) - *str = "card"; - else if (channel == CHANNEL_PKG) - *str = "pkg"; - return 0; - default: - return -EOPNOTSUPP; - } -} - -static const struct hwmon_ops hwmon_ops = { - .is_visible = xe_hwmon_is_visible, - .read = xe_hwmon_read, - .write = xe_hwmon_write, - .read_string = xe_hwmon_read_label, -}; - -static const struct hwmon_chip_info hwmon_chip_info = { - .ops = &hwmon_ops, - .info = hwmon_info, -}; - -static void -xe_hwmon_get_preregistration_info(struct xe_device *xe) -{ - struct xe_gt *mmio = xe_root_mmio_gt(xe); - struct xe_hwmon *hwmon = xe->hwmon; - long energy; - u64 val_sku_unit = 0; - int channel; - struct xe_reg pkg_power_sku_unit; - - /* - * The contents of register PKG_POWER_SKU_UNIT do not change, - * so read it once and store the shift values. - */ - pkg_power_sku_unit = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0); - if (xe_reg_is_valid(pkg_power_sku_unit)) { - val_sku_unit = xe_mmio_read32(mmio, pkg_power_sku_unit); - hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); - hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit); - hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit); - } - - /* - * Initialize 'struct xe_hwmon_energy_info', i.e. set fields to the - * first value of the energy register read - */ - for (channel = 0; channel < CHANNEL_MAX; channel++) - if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, channel)) - xe_hwmon_energy_get(hwmon, channel, &energy); -} - -static void xe_hwmon_mutex_destroy(void *arg) -{ - struct xe_hwmon *hwmon = arg; - - mutex_destroy(&hwmon->hwmon_lock); -} - -void xe_hwmon_register(struct xe_device *xe) -{ - struct device *dev = xe->drm.dev; - struct xe_hwmon *hwmon; - - /* hwmon is available only for dGfx */ - if (!IS_DGFX(xe)) - return; - - /* hwmon is not available on VFs */ - if (IS_SRIOV_VF(xe)) - return; - - hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL); - if (!hwmon) - return; - - xe->hwmon = hwmon; - - mutex_init(&hwmon->hwmon_lock); - if (devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon)) - return; - - /* There's only one instance of hwmon per device */ - hwmon->xe = xe; - - xe_hwmon_get_preregistration_info(xe); - - drm_dbg(&xe->drm, "Register xe hwmon interface\n"); - - /* hwmon_dev points to device hwmon<i> */ - hwmon->hwmon_dev = devm_hwmon_device_register_with_info(dev, "xe", hwmon, - &hwmon_chip_info, - hwmon_groups); - - if (IS_ERR(hwmon->hwmon_dev)) { - drm_warn(&xe->drm, "Failed to register xe hwmon (%pe)\n", hwmon->hwmon_dev); - xe->hwmon = NULL; - return; - } -} - diff --git a/rr-cache/97ec3e6c58a0b80be4db896ab4c8159948f85ffe/preimage b/rr-cache/97ec3e6c58a0b80be4db896ab4c8159948f85ffe/preimage deleted file mode 100644 index fffca61207b7..000000000000 --- a/rr-cache/97ec3e6c58a0b80be4db896ab4c8159948f85ffe/preimage +++ /dev/null @@ -1,868 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2023 Intel Corporation - */ - -#include <linux/hwmon-sysfs.h> -#include <linux/hwmon.h> -#include <linux/types.h> - -#include <drm/drm_managed.h> -#include "regs/xe_gt_regs.h" -#include "regs/xe_mchbar_regs.h" -#include "regs/xe_pcode_regs.h" -#include "xe_device.h" -#include "xe_hwmon.h" -#include "xe_mmio.h" -#include "xe_pcode.h" -#include "xe_pcode_api.h" -#include "xe_sriov.h" -#include "xe_pm.h" - -enum xe_hwmon_reg { - REG_PKG_RAPL_LIMIT, - REG_PKG_POWER_SKU, - REG_PKG_POWER_SKU_UNIT, - REG_GT_PERF_STATUS, - REG_PKG_ENERGY_STATUS, -}; - -enum xe_hwmon_reg_operation { - REG_READ32, - REG_RMW32, - REG_READ64, -}; - -enum xe_hwmon_channel { - CHANNEL_CARD, - CHANNEL_PKG, - CHANNEL_MAX, -}; - -/* - * SF_* - scale factors for particular quantities according to hwmon spec. - */ -#define SF_POWER 1000000 /* microwatts */ -#define SF_CURR 1000 /* milliamperes */ -#define SF_VOLTAGE 1000 /* millivolts */ -#define SF_ENERGY 1000000 /* microjoules */ -#define SF_TIME 1000 /* milliseconds */ - -/** - * struct xe_hwmon_energy_info - to accumulate energy - */ -struct xe_hwmon_energy_info { - /** @reg_val_prev: previous energy reg val */ - u32 reg_val_prev; - /** @accum_energy: accumulated energy */ - long accum_energy; -}; - -/** - * struct xe_hwmon - xe hwmon data structure - */ -struct xe_hwmon { - /** @hwmon_dev: hwmon device for xe */ - struct device *hwmon_dev; - /** @xe: Xe device */ - struct xe_device *xe; - /** @hwmon_lock: lock for rw attributes*/ - struct mutex hwmon_lock; - /** @scl_shift_power: pkg power unit */ - int scl_shift_power; - /** @scl_shift_energy: pkg energy unit */ - int scl_shift_energy; - /** @scl_shift_time: pkg time unit */ - int scl_shift_time; - /** @ei: Energy info for energyN_input */ - struct xe_hwmon_energy_info ei[CHANNEL_MAX]; -}; - -static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg, - int channel) -{ - struct xe_device *xe = hwmon->xe; - - switch (hwmon_reg) { - case REG_PKG_RAPL_LIMIT: - if (xe->info.platform == XE_BATTLEMAGE) { - if (channel == CHANNEL_PKG) - return BMG_PACKAGE_RAPL_LIMIT; - else - return BMG_PLATFORM_POWER_LIMIT; - } else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) { - return PVC_GT0_PACKAGE_RAPL_LIMIT; - } else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) { - return PCU_CR_PACKAGE_RAPL_LIMIT; - } - break; - case REG_PKG_POWER_SKU: - if (xe->info.platform == XE_BATTLEMAGE) - return BMG_PACKAGE_POWER_SKU; - else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) - return PVC_GT0_PACKAGE_POWER_SKU; - else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) - return PCU_CR_PACKAGE_POWER_SKU; - break; - case REG_PKG_POWER_SKU_UNIT: - if (xe->info.platform == XE_BATTLEMAGE) - return BMG_PACKAGE_POWER_SKU_UNIT; - else if (xe->info.platform == XE_PVC) - return PVC_GT0_PACKAGE_POWER_SKU_UNIT; - else if (xe->info.platform == XE_DG2) - return PCU_CR_PACKAGE_POWER_SKU_UNIT; - break; - case REG_GT_PERF_STATUS: - if (xe->info.platform == XE_DG2 && channel == CHANNEL_PKG) - return GT_PERF_STATUS; - break; - case REG_PKG_ENERGY_STATUS: - if (xe->info.platform == XE_BATTLEMAGE) { - if (channel == CHANNEL_PKG) - return BMG_PACKAGE_ENERGY_STATUS; - else - return BMG_PLATFORM_ENERGY_STATUS; - } else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) { - return PVC_GT0_PLATFORM_ENERGY_STATUS; - } else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) { - return PCU_CR_PACKAGE_ENERGY_STATUS; - } - break; - default: - drm_warn(&xe->drm, "Unknown xe hwmon reg id: %d\n", hwmon_reg); - break; - } - - return XE_REG(0); -} - -#define PL1_DISABLE 0 - -/* - * HW allows arbitrary PL1 limits to be set but silently clamps these values to - * "typical but not guaranteed" min/max values in REG_PKG_POWER_SKU. Follow the - * same pattern for sysfs, allow arbitrary PL1 limits to be set but display - * clamped values when read. - */ -static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *value) -{ - u64 reg_val, min, max; - struct xe_device *xe = hwmon->xe; - struct xe_reg rapl_limit, pkg_power_sku; - struct xe_gt *mmio = xe_root_mmio_gt(xe); - - rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); - pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); - - /* - * Valid check of REG_PKG_RAPL_LIMIT is already done in xe_hwmon_power_is_visible. - * So not checking it again here. - */ - if (!xe_reg_is_valid(pkg_power_sku)) { - drm_warn(&xe->drm, "pkg_power_sku invalid\n"); - *value = 0; - return; - } - - mutex_lock(&hwmon->hwmon_lock); - - reg_val = xe_mmio_read32(mmio, rapl_limit); - /* Check if PL1 limit is disabled */ - if (!(reg_val & PKG_PWR_LIM_1_EN)) { - *value = PL1_DISABLE; - goto unlock; - } - - reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val); - *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); - - reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku); - min = REG_FIELD_GET(PKG_MIN_PWR, reg_val); - min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power); - max = REG_FIELD_GET(PKG_MAX_PWR, reg_val); - max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power); - - if (min && max) - *value = clamp_t(u64, *value, min, max); -unlock: - mutex_unlock(&hwmon->hwmon_lock); -} - -static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long value) -{ - struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); - int ret = 0; - u64 reg_val; - struct xe_reg rapl_limit; - - rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel); - - mutex_lock(&hwmon->hwmon_lock); - - /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */ - if (value == PL1_DISABLE) { - reg_val = xe_mmio_rmw32(mmio, rapl_limit, PKG_PWR_LIM_1_EN, 0); - reg_val = xe_mmio_read32(mmio, rapl_limit); - if (reg_val & PKG_PWR_LIM_1_EN) { - drm_warn(&hwmon->xe->drm, "PL1 disable is not supported!\n"); - ret = -EOPNOTSUPP; - } - goto unlock; - } - - /* Computation in 64-bits to avoid overflow. Round to nearest. */ - reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER); - reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val); - reg_val = xe_mmio_rmw32(mmio, rapl_limit, PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val); - -unlock: - mutex_unlock(&hwmon->hwmon_lock); - return ret; -} - -static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, long *value) -{ - struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); - struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel); - u64 reg_val; - - /* - * This sysfs file won't be visible if REG_PKG_POWER_SKU is invalid, so valid check - * for this register can be skipped. - * See xe_hwmon_power_is_visible. - */ - reg_val = xe_mmio_read32(mmio, reg); - reg_val = REG_FIELD_GET(PKG_TDP, reg_val); - *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power); -} - -/* - * xe_hwmon_energy_get - Obtain energy value - * - * The underlying energy hardware register is 32-bits and is subject to - * overflow. How long before overflow? For example, with an example - * scaling bit shift of 14 bits (see register *PACKAGE_POWER_SKU_UNIT) and - * a power draw of 1000 watts, the 32-bit counter will overflow in - * approximately 4.36 minutes. - * - * Examples: - * 1 watt: (2^32 >> 14) / 1 W / (60 * 60 * 24) secs/day -> 3 days - * 1000 watts: (2^32 >> 14) / 1000 W / 60 secs/min -> 4.36 minutes - * - * The function significantly increases overflow duration (from 4.36 - * minutes) by accumulating the energy register into a 'long' as allowed by - * the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()), - * a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and - * hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before - * energyN_input overflows. This at 1000 W is an overflow duration of 278 years. - */ -static void -xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy) -{ - struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); - struct xe_hwmon_energy_info *ei = &hwmon->ei[channel]; - u64 reg_val; - - reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, - channel)); - - if (reg_val >= ei->reg_val_prev) - ei->accum_energy += reg_val - ei->reg_val_prev; - else - ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val; - - ei->reg_val_prev = reg_val; - - *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY, - hwmon->scl_shift_energy); -} - -static ssize_t -xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct xe_hwmon *hwmon = dev_get_drvdata(dev); - struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); - u32 x, y, x_w = 2; /* 2 bits */ - u64 r, tau4, out; - int sensor_index = to_sensor_dev_attr(attr)->index; - - xe_pm_runtime_get(hwmon->xe); - - mutex_lock(&hwmon->hwmon_lock); - - r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index)); - - mutex_unlock(&hwmon->hwmon_lock); - - xe_pm_runtime_put(hwmon->xe); - - x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r); - y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r); - - /* - * tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17) - * = (4 | x) << (y - 2) - * - * Here (y - 2) ensures a 1.x fixed point representation of 1.x - * As x is 2 bits so 1.x can be 1.0, 1.25, 1.50, 1.75 - * - * As y can be < 2, we compute tau4 = (4 | x) << y - * and then add 2 when doing the final right shift to account for units - */ - tau4 = (u64)((1 << x_w) | x) << y; - - /* val in hwmon interface units (millisec) */ - out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); - - return sysfs_emit(buf, "%llu\n", out); -} - -static ssize_t -xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct xe_hwmon *hwmon = dev_get_drvdata(dev); - struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); - u32 x, y, rxy, x_w = 2; /* 2 bits */ - u64 tau4, r, max_win; - unsigned long val; - int ret; - int sensor_index = to_sensor_dev_attr(attr)->index; - - ret = kstrtoul(buf, 0, &val); - if (ret) - return ret; - - /* - * Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12. - * The hwmon->scl_shift_time default of 0xa results in a max tau of 256 seconds. - * - * The ideal scenario is for PKG_MAX_WIN to be read from the PKG_PWR_SKU register. - * However, it is observed that existing discrete GPUs does not provide correct - * PKG_MAX_WIN value, therefore a using default constant value. For future discrete GPUs - * this may get resolved, in which case PKG_MAX_WIN should be obtained from PKG_PWR_SKU. - */ -#define PKG_MAX_WIN_DEFAULT 0x12ull - - /* - * val must be < max in hwmon interface units. The steps below are - * explained in xe_hwmon_power_max_interval_show() - */ - r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT); - x = REG_FIELD_GET(PKG_MAX_WIN_X, r); - y = REG_FIELD_GET(PKG_MAX_WIN_Y, r); - tau4 = (u64)((1 << x_w) | x) << y; - max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w); - - if (val > max_win) - return -EINVAL; - - /* val in hw units */ - val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME); - - /* - * Convert val to 1.x * power(2,y) - * y = ilog2(val) - * x = (val - (1 << y)) >> (y - 2) - */ - if (!val) { - y = 0; - x = 0; - } else { - y = ilog2(val); - x = (val - (1ul << y)) << x_w >> y; - } - - rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y); - - xe_pm_runtime_get(hwmon->xe); - - mutex_lock(&hwmon->hwmon_lock); - - r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index), - PKG_PWR_LIM_1_TIME, rxy); - - mutex_unlock(&hwmon->hwmon_lock); - - xe_pm_runtime_put(hwmon->xe); - - return count; -} - -static SENSOR_DEVICE_ATTR(power1_max_interval, 0664, - xe_hwmon_power_max_interval_show, - xe_hwmon_power_max_interval_store, CHANNEL_CARD); - -static SENSOR_DEVICE_ATTR(power2_max_interval, 0664, - xe_hwmon_power_max_interval_show, - xe_hwmon_power_max_interval_store, CHANNEL_PKG); - -static struct attribute *hwmon_attributes[] = { - &sensor_dev_attr_power1_max_interval.dev_attr.attr, - &sensor_dev_attr_power2_max_interval.dev_attr.attr, - NULL -}; - -static umode_t xe_hwmon_attributes_visible(struct kobject *kobj, - struct attribute *attr, int index) -{ - struct device *dev = kobj_to_dev(kobj); - struct xe_hwmon *hwmon = dev_get_drvdata(dev); - int ret = 0; - - xe_pm_runtime_get(hwmon->xe); - - ret = xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index)) ? attr->mode : 0; - - xe_pm_runtime_put(hwmon->xe); - - return ret; -} - -static const struct attribute_group hwmon_attrgroup = { - .attrs = hwmon_attributes, - .is_visible = xe_hwmon_attributes_visible, -}; - -static const struct attribute_group *hwmon_groups[] = { - &hwmon_attrgroup, - NULL -}; - -static const struct hwmon_channel_info * const hwmon_info[] = { - HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL, - HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT | HWMON_P_LABEL), - HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL), - HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL), - HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL), - NULL -}; - -/* I1 is exposed as power_crit or as curr_crit depending on bit 31 */ -static int xe_hwmon_pcode_read_i1(const struct xe_hwmon *hwmon, u32 *uval) -{ - struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); - - /* Avoid Illegal Subcommand error */ - if (hwmon->xe->info.platform == XE_DG2) - return -ENXIO; - -<<<<<<< - return xe_pcode_read(gt_to_tile(gt), PCODE_MBOX(PCODE_POWER_SETUP, -======= - return xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, ->>>>>>> - POWER_SETUP_SUBCOMMAND_READ_I1, 0), - uval, NULL); -} - -static int xe_hwmon_pcode_write_i1(const struct xe_hwmon *hwmon, u32 uval) -{ -<<<<<<< - return xe_pcode_write(gt_to_tile(gt), PCODE_MBOX(PCODE_POWER_SETUP, -======= - struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe); - - return xe_pcode_write(root_tile, PCODE_MBOX(PCODE_POWER_SETUP, ->>>>>>> - POWER_SETUP_SUBCOMMAND_WRITE_I1, 0), - (uval & POWER_SETUP_I1_DATA_MASK)); -} - -static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel, - long *value, u32 scale_factor) -{ - int ret; - u32 uval; - - mutex_lock(&hwmon->hwmon_lock); - - ret = xe_hwmon_pcode_read_i1(hwmon, &uval); - if (ret) - goto unlock; - - *value = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval), - scale_factor, POWER_SETUP_I1_SHIFT); -unlock: - mutex_unlock(&hwmon->hwmon_lock); - return ret; -} - -static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel, - long value, u32 scale_factor) -{ - int ret; - u32 uval; - - mutex_lock(&hwmon->hwmon_lock); - - uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor); - ret = xe_hwmon_pcode_write_i1(hwmon, uval); - - mutex_unlock(&hwmon->hwmon_lock); - return ret; -} - -static void xe_hwmon_get_voltage(struct xe_hwmon *hwmon, int channel, long *value) -{ - struct xe_gt *mmio = xe_root_mmio_gt(hwmon->xe); - u64 reg_val; - - reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, channel)); - /* HW register value in units of 2.5 millivolt */ - *value = DIV_ROUND_CLOSEST(REG_FIELD_GET(VOLTAGE_MASK, reg_val) * 2500, SF_VOLTAGE); -} - -static umode_t -xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) -{ - u32 uval; - - switch (attr) { - case hwmon_power_max: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, - channel)) ? 0664 : 0; - case hwmon_power_rated_max: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, - channel)) ? 0444 : 0; - case hwmon_power_crit: - if (channel == CHANNEL_PKG) - return (xe_hwmon_pcode_read_i1(hwmon, &uval) || - !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; - break; - case hwmon_power_label: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, - channel)) ? 0444 : 0; - default: - return 0; - } - return 0; -} - -static int -xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) -{ - switch (attr) { - case hwmon_power_max: - xe_hwmon_power_max_read(hwmon, channel, val); - return 0; - case hwmon_power_rated_max: - xe_hwmon_power_rated_max_read(hwmon, channel, val); - return 0; - case hwmon_power_crit: - return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_POWER); - default: - return -EOPNOTSUPP; - } -} - -static int -xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val) -{ - switch (attr) { - case hwmon_power_max: - return xe_hwmon_power_max_write(hwmon, channel, val); - case hwmon_power_crit: - return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_POWER); - default: - return -EOPNOTSUPP; - } -} - -static umode_t -xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr, int channel) -{ - u32 uval; - - /* hwmon sysfs attribute of current available only for package */ - if (channel != CHANNEL_PKG) - return 0; - - switch (attr) { - case hwmon_curr_crit: - return (xe_hwmon_pcode_read_i1(hwmon, &uval) || - (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644; - case hwmon_curr_label: - return (xe_hwmon_pcode_read_i1(hwmon, &uval) || - (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0444; - break; - default: - return 0; - } - return 0; -} - -static int -xe_hwmon_curr_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) -{ - switch (attr) { - case hwmon_curr_crit: - return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_CURR); - default: - return -EOPNOTSUPP; - } -} - -static int -xe_hwmon_curr_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val) -{ - switch (attr) { - case hwmon_curr_crit: - return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_CURR); - default: - return -EOPNOTSUPP; - } -} - -static umode_t -xe_hwmon_in_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) -{ - switch (attr) { - case hwmon_in_input: - case hwmon_in_label: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_GT_PERF_STATUS, - channel)) ? 0444 : 0; - default: - return 0; - } -} - -static int -xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) -{ - switch (attr) { - case hwmon_in_input: - xe_hwmon_get_voltage(hwmon, channel, val); - return 0; - default: - return -EOPNOTSUPP; - } -} - -static umode_t -xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel) -{ - switch (attr) { - case hwmon_energy_input: - case hwmon_energy_label: - return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS, - channel)) ? 0444 : 0; - default: - return 0; - } -} - -static int -xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val) -{ - switch (attr) { - case hwmon_energy_input: - xe_hwmon_energy_get(hwmon, channel, val); - return 0; - default: - return -EOPNOTSUPP; - } -} - -static umode_t -xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type, - u32 attr, int channel) -{ - struct xe_hwmon *hwmon = (struct xe_hwmon *)drvdata; - int ret; - - xe_pm_runtime_get(hwmon->xe); - - switch (type) { - case hwmon_power: - ret = xe_hwmon_power_is_visible(hwmon, attr, channel); - break; - case hwmon_curr: - ret = xe_hwmon_curr_is_visible(hwmon, attr, channel); - break; - case hwmon_in: - ret = xe_hwmon_in_is_visible(hwmon, attr, channel); - break; - case hwmon_energy: - ret = xe_hwmon_energy_is_visible(hwmon, attr, channel); - break; - default: - ret = 0; - break; - } - - xe_pm_runtime_put(hwmon->xe); - - return ret; -} - -static int -xe_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, - int channel, long *val) -{ - struct xe_hwmon *hwmon = dev_get_drvdata(dev); - int ret; - - xe_pm_runtime_get(hwmon->xe); - - switch (type) { - case hwmon_power: - ret = xe_hwmon_power_read(hwmon, attr, channel, val); - break; - case hwmon_curr: - ret = xe_hwmon_curr_read(hwmon, attr, channel, val); - break; - case hwmon_in: - ret = xe_hwmon_in_read(hwmon, attr, channel, val); - break; - case hwmon_energy: - ret = xe_hwmon_energy_read(hwmon, attr, channel, val); - break; - default: - ret = -EOPNOTSUPP; - break; - } - - xe_pm_runtime_put(hwmon->xe); - - return ret; -} - -static int -xe_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, - int channel, long val) -{ - struct xe_hwmon *hwmon = dev_get_drvdata(dev); - int ret; - - xe_pm_runtime_get(hwmon->xe); - - switch (type) { - case hwmon_power: - ret = xe_hwmon_power_write(hwmon, attr, channel, val); - break; - case hwmon_curr: - ret = xe_hwmon_curr_write(hwmon, attr, channel, val); - break; - default: - ret = -EOPNOTSUPP; - break; - } - - xe_pm_runtime_put(hwmon->xe); - - return ret; -} - -static int xe_hwmon_read_label(struct device *dev, - enum hwmon_sensor_types type, - u32 attr, int channel, const char **str) -{ - switch (type) { - case hwmon_power: - case hwmon_energy: - case hwmon_curr: - case hwmon_in: - if (channel == CHANNEL_CARD) - *str = "card"; - else if (channel == CHANNEL_PKG) - *str = "pkg"; - return 0; - default: - return -EOPNOTSUPP; - } -} - -static const struct hwmon_ops hwmon_ops = { - .is_visible = xe_hwmon_is_visible, - .read = xe_hwmon_read, - .write = xe_hwmon_write, - .read_string = xe_hwmon_read_label, -}; - -static const struct hwmon_chip_info hwmon_chip_info = { - .ops = &hwmon_ops, - .info = hwmon_info, -}; - -static void -xe_hwmon_get_preregistration_info(struct xe_device *xe) -{ - struct xe_gt *mmio = xe_root_mmio_gt(xe); - struct xe_hwmon *hwmon = xe->hwmon; - long energy; - u64 val_sku_unit = 0; - int channel; - struct xe_reg pkg_power_sku_unit; - - /* - * The contents of register PKG_POWER_SKU_UNIT do not change, - * so read it once and store the shift values. - */ - pkg_power_sku_unit = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0); - if (xe_reg_is_valid(pkg_power_sku_unit)) { - val_sku_unit = xe_mmio_read32(mmio, pkg_power_sku_unit); - hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit); - hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit); - hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit); - } - - /* - * Initialize 'struct xe_hwmon_energy_info', i.e. set fields to the - * first value of the energy register read - */ - for (channel = 0; channel < CHANNEL_MAX; channel++) - if (xe_hwmon_is_visible(hwmon, hwmon_energy, hwmon_energy_input, channel)) - xe_hwmon_energy_get(hwmon, channel, &energy); -} - -static void xe_hwmon_mutex_destroy(void *arg) -{ - struct xe_hwmon *hwmon = arg; - - mutex_destroy(&hwmon->hwmon_lock); -} - -void xe_hwmon_register(struct xe_device *xe) -{ - struct device *dev = xe->drm.dev; - struct xe_hwmon *hwmon; - - /* hwmon is available only for dGfx */ - if (!IS_DGFX(xe)) - return; - - /* hwmon is not available on VFs */ - if (IS_SRIOV_VF(xe)) - return; - - hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL); - if (!hwmon) - return; - - xe->hwmon = hwmon; - - mutex_init(&hwmon->hwmon_lock); - if (devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon)) - return; - - /* There's only one instance of hwmon per device */ - hwmon->xe = xe; - - xe_hwmon_get_preregistration_info(xe); - - drm_dbg(&xe->drm, "Register xe hwmon interface\n"); - - /* hwmon_dev points to device hwmon<i> */ - hwmon->hwmon_dev = devm_hwmon_device_register_with_info(dev, "xe", hwmon, - &hwmon_chip_info, - hwmon_groups); - - if (IS_ERR(hwmon->hwmon_dev)) { - drm_warn(&xe->drm, "Failed to register xe hwmon (%pe)\n", hwmon->hwmon_dev); - xe->hwmon = NULL; - return; - } -} - diff --git a/rr-cache/9b13d9d717b2c7afda9a1a29c7ee25f3085193b3/postimage b/rr-cache/9b13d9d717b2c7afda9a1a29c7ee25f3085193b3/postimage deleted file mode 100644 index c4877090f574..000000000000 --- a/rr-cache/9b13d9d717b2c7afda9a1a29c7ee25f3085193b3/postimage +++ /dev/null @@ -1,3351 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2021 Intel Corporation - */ - -#include "xe_vm.h" - -#include <linux/dma-fence-array.h> -#include <linux/nospec.h> - -#include <drm/drm_exec.h> -#include <drm/drm_print.h> -#include <drm/ttm/ttm_execbuf_util.h> -#include <drm/ttm/ttm_tt.h> -#include <drm/xe_drm.h> -#include <linux/ascii85.h> -#include <linux/delay.h> -#include <linux/kthread.h> -#include <linux/mm.h> -#include <linux/swap.h> - -#include <generated/xe_wa_oob.h> - -#include "regs/xe_gtt_defs.h" -#include "xe_assert.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_exec_queue.h" -#include "xe_gt_pagefault.h" -#include "xe_gt_tlb_invalidation.h" -#include "xe_migrate.h" -#include "xe_pat.h" -#include "xe_pm.h" -#include "xe_preempt_fence.h" -#include "xe_pt.h" -#include "xe_res_cursor.h" -#include "xe_sync.h" -#include "xe_trace_bo.h" -#include "xe_wa.h" -#include "xe_hmm.h" - -static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) -{ - return vm->gpuvm.r_obj; -} - -/** - * xe_vma_userptr_check_repin() - Advisory check for repin needed - * @uvma: The userptr vma - * - * Check if the userptr vma has been invalidated since last successful - * repin. The check is advisory only and can the function can be called - * without the vm->userptr.notifier_lock held. There is no guarantee that the - * vma userptr will remain valid after a lockless check, so typically - * the call needs to be followed by a proper check under the notifier_lock. - * - * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. - */ -int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) -{ - return mmu_interval_check_retry(&uvma->userptr.notifier, - uvma->userptr.notifier_seq) ? - -EAGAIN : 0; -} - -int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) -{ - struct xe_vma *vma = &uvma->vma; - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_device *xe = vm->xe; - - lockdep_assert_held(&vm->lock); - xe_assert(xe, xe_vma_is_userptr(vma)); - - return xe_hmm_userptr_populate_range(uvma, false); -} - -static bool preempt_fences_waiting(struct xe_vm *vm) -{ - struct xe_exec_queue *q; - - lockdep_assert_held(&vm->lock); - xe_vm_assert_held(vm); - - list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { - if (!q->lr.pfence || - test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &q->lr.pfence->flags)) { - return true; - } - } - - return false; -} - -static void free_preempt_fences(struct list_head *list) -{ - struct list_head *link, *next; - - list_for_each_safe(link, next, list) - xe_preempt_fence_free(to_preempt_fence_from_link(link)); -} - -static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, - unsigned int *count) -{ - lockdep_assert_held(&vm->lock); - xe_vm_assert_held(vm); - - if (*count >= vm->preempt.num_exec_queues) - return 0; - - for (; *count < vm->preempt.num_exec_queues; ++(*count)) { - struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); - - if (IS_ERR(pfence)) - return PTR_ERR(pfence); - - list_move_tail(xe_preempt_fence_link(pfence), list); - } - - return 0; -} - -static int wait_for_existing_preempt_fences(struct xe_vm *vm) -{ - struct xe_exec_queue *q; - - xe_vm_assert_held(vm); - - list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { - if (q->lr.pfence) { - long timeout = dma_fence_wait(q->lr.pfence, false); - - if (timeout < 0) - return -ETIME; - dma_fence_put(q->lr.pfence); - q->lr.pfence = NULL; - } - } - - return 0; -} - -static bool xe_vm_is_idle(struct xe_vm *vm) -{ - struct xe_exec_queue *q; - - xe_vm_assert_held(vm); - list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { - if (!xe_exec_queue_is_idle(q)) - return false; - } - - return true; -} - -static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) -{ - struct list_head *link; - struct xe_exec_queue *q; - - list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { - struct dma_fence *fence; - - link = list->next; - xe_assert(vm->xe, link != list); - - fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), - q, q->lr.context, - ++q->lr.seqno); - dma_fence_put(q->lr.pfence); - q->lr.pfence = fence; - } -} - -static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) -{ - struct xe_exec_queue *q; - int err; - - xe_bo_assert_held(bo); - - if (!vm->preempt.num_exec_queues) - return 0; - - err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); - if (err) - return err; - - list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) - if (q->lr.pfence) { - dma_resv_add_fence(bo->ttm.base.resv, - q->lr.pfence, - DMA_RESV_USAGE_BOOKKEEP); - } - - return 0; -} - -static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, - struct drm_exec *exec) -{ - struct xe_exec_queue *q; - - lockdep_assert_held(&vm->lock); - xe_vm_assert_held(vm); - - list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { - q->ops->resume(q); - - drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, - DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); - } -} - -int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) -{ - struct drm_gpuvm_exec vm_exec = { - .vm = &vm->gpuvm, - .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, - .num_fences = 1, - }; - struct drm_exec *exec = &vm_exec.exec; - struct dma_fence *pfence; - int err; - bool wait; - - xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); - - down_write(&vm->lock); - err = drm_gpuvm_exec_lock(&vm_exec); - if (err) - goto out_up_write; - - pfence = xe_preempt_fence_create(q, q->lr.context, - ++q->lr.seqno); - if (!pfence) { - err = -ENOMEM; - goto out_fini; - } - - list_add(&q->lr.link, &vm->preempt.exec_queues); - ++vm->preempt.num_exec_queues; - q->lr.pfence = pfence; - - down_read(&vm->userptr.notifier_lock); - - drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, - DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); - - /* - * Check to see if a preemption on VM is in flight or userptr - * invalidation, if so trigger this preempt fence to sync state with - * other preempt fences on the VM. - */ - wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); - if (wait) - dma_fence_enable_sw_signaling(pfence); - - up_read(&vm->userptr.notifier_lock); - -out_fini: - drm_exec_fini(exec); -out_up_write: - up_write(&vm->lock); - - return err; -} - -/** - * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM - * @vm: The VM. - * @q: The exec_queue - */ -void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) -{ - if (!xe_vm_in_preempt_fence_mode(vm)) - return; - - down_write(&vm->lock); - list_del(&q->lr.link); - --vm->preempt.num_exec_queues; - if (q->lr.pfence) { - dma_fence_enable_sw_signaling(q->lr.pfence); - dma_fence_put(q->lr.pfence); - q->lr.pfence = NULL; - } - up_write(&vm->lock); -} - -/** - * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs - * that need repinning. - * @vm: The VM. - * - * This function checks for whether the VM has userptrs that need repinning, - * and provides a release-type barrier on the userptr.notifier_lock after - * checking. - * - * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. - */ -int __xe_vm_userptr_needs_repin(struct xe_vm *vm) -{ - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - return (list_empty(&vm->userptr.repin_list) && - list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; -} - -#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 - -void xe_vm_kill(struct xe_vm *vm, bool unlocked) -{ - struct xe_exec_queue *q; - - lockdep_assert_held(&vm->lock); - - if (unlocked) - xe_vm_lock(vm, false); - - vm->flags |= XE_VM_FLAG_BANNED; - trace_xe_vm_kill(vm); - - list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) - q->ops->kill(q); - - if (unlocked) - xe_vm_unlock(vm); - - /* TODO: Inform user the VM is banned */ -} - -/** - * xe_vm_validate_should_retry() - Whether to retry after a validate error. - * @exec: The drm_exec object used for locking before validation. - * @err: The error returned from ttm_bo_validate(). - * @end: A ktime_t cookie that should be set to 0 before first use and - * that should be reused on subsequent calls. - * - * With multiple active VMs, under memory pressure, it is possible that - * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. - * Until ttm properly handles locking in such scenarios, best thing the - * driver can do is retry with a timeout. Check if that is necessary, and - * if so unlock the drm_exec's objects while keeping the ticket to prepare - * for a rerun. - * - * Return: true if a retry after drm_exec_init() is recommended; - * false otherwise. - */ -bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) -{ - ktime_t cur; - - if (err != -ENOMEM) - return false; - - cur = ktime_get(); - *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); - if (!ktime_before(cur, *end)) - return false; - - msleep(20); - return true; -} - -static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) -{ - struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); - struct drm_gpuva *gpuva; - int ret; - - lockdep_assert_held(&vm->lock); - drm_gpuvm_bo_for_each_va(gpuva, vm_bo) - list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, - &vm->rebind_list); - - ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); - if (ret) - return ret; - - vm_bo->evicted = false; - return 0; -} - -/** - * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas - * @vm: The vm for which we are rebinding. - * @exec: The struct drm_exec with the locked GEM objects. - * @num_fences: The number of fences to reserve for the operation, not - * including rebinds and validations. - * - * Validates all evicted gem objects and rebinds their vmas. Note that - * rebindings may cause evictions and hence the validation-rebind - * sequence is rerun until there are no more objects to validate. - * - * Return: 0 on success, negative error code on error. In particular, - * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if - * the drm_exec transaction needs to be restarted. - */ -int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, - unsigned int num_fences) -{ - struct drm_gem_object *obj; - unsigned long index; - int ret; - - do { - ret = drm_gpuvm_validate(&vm->gpuvm, exec); - if (ret) - return ret; - - ret = xe_vm_rebind(vm, false); - if (ret) - return ret; - } while (!list_empty(&vm->gpuvm.evict.list)); - - drm_exec_for_each_locked_object(exec, index, obj) { - ret = dma_resv_reserve_fences(obj->resv, num_fences); - if (ret) - return ret; - } - - return 0; -} - -static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, - bool *done) -{ - int err; - - err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); - if (err) - return err; - - if (xe_vm_is_idle(vm)) { - vm->preempt.rebind_deactivated = true; - *done = true; - return 0; - } - - if (!preempt_fences_waiting(vm)) { - *done = true; - return 0; - } - - err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); - if (err) - return err; - - err = wait_for_existing_preempt_fences(vm); - if (err) - return err; - - /* - * Add validation and rebinding to the locking loop since both can - * cause evictions which may require blocing dma_resv locks. - * The fence reservation here is intended for the new preempt fences - * we attach at the end of the rebind work. - */ - return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); -} - -static void preempt_rebind_work_func(struct work_struct *w) -{ - struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); - struct drm_exec exec; - unsigned int fence_count = 0; - LIST_HEAD(preempt_fences); - ktime_t end = 0; - int err = 0; - long wait; - int __maybe_unused tries = 0; - - xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); - trace_xe_vm_rebind_worker_enter(vm); - - down_write(&vm->lock); - - if (xe_vm_is_closed_or_banned(vm)) { - up_write(&vm->lock); - trace_xe_vm_rebind_worker_exit(vm); - return; - } - -retry: - if (xe_vm_userptr_check_repin(vm)) { - err = xe_vm_userptr_pin(vm); - if (err) - goto out_unlock_outer; - } - - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); - - drm_exec_until_all_locked(&exec) { - bool done = false; - - err = xe_preempt_work_begin(&exec, vm, &done); - drm_exec_retry_on_contention(&exec); - if (err || done) { - drm_exec_fini(&exec); - if (err && xe_vm_validate_should_retry(&exec, err, &end)) - err = -EAGAIN; - - goto out_unlock_outer; - } - } - - err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); - if (err) - goto out_unlock; - - err = xe_vm_rebind(vm, true); - if (err) - goto out_unlock; - - /* Wait on rebinds and munmap style VM unbinds */ - wait = dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_KERNEL, - false, MAX_SCHEDULE_TIMEOUT); - if (wait <= 0) { - err = -ETIME; - goto out_unlock; - } - -#define retry_required(__tries, __vm) \ - (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ - (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ - __xe_vm_userptr_needs_repin(__vm)) - - down_read(&vm->userptr.notifier_lock); - if (retry_required(tries, vm)) { - up_read(&vm->userptr.notifier_lock); - err = -EAGAIN; - goto out_unlock; - } - -#undef retry_required - - spin_lock(&vm->xe->ttm.lru_lock); - ttm_lru_bulk_move_tail(&vm->lru_bulk_move); - spin_unlock(&vm->xe->ttm.lru_lock); - - /* Point of no return. */ - arm_preempt_fences(vm, &preempt_fences); - resume_and_reinstall_preempt_fences(vm, &exec); - up_read(&vm->userptr.notifier_lock); - -out_unlock: - drm_exec_fini(&exec); -out_unlock_outer: - if (err == -EAGAIN) { - trace_xe_vm_rebind_worker_retry(vm); - goto retry; - } - - if (err) { - drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); - xe_vm_kill(vm, true); - } - up_write(&vm->lock); - - free_preempt_fences(&preempt_fences); - - trace_xe_vm_rebind_worker_exit(vm); -} - -static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, - const struct mmu_notifier_range *range, - unsigned long cur_seq) -{ - struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier); - struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr); - struct xe_vma *vma = &uvma->vma; - struct xe_vm *vm = xe_vma_vm(vma); - struct dma_resv_iter cursor; - struct dma_fence *fence; - long err; - - xe_assert(vm->xe, xe_vma_is_userptr(vma)); - trace_xe_vma_userptr_invalidate(vma); - - if (!mmu_notifier_range_blockable(range)) - return false; - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "NOTIFIER: addr=0x%016llx, range=0x%016llx", - xe_vma_start(vma), xe_vma_size(vma)); - - down_write(&vm->userptr.notifier_lock); - mmu_interval_set_seq(mni, cur_seq); - - /* No need to stop gpu access if the userptr is not yet bound. */ - if (!userptr->initial_bind) { - up_write(&vm->userptr.notifier_lock); - return true; - } - - /* - * Tell exec and rebind worker they need to repin and rebind this - * userptr. - */ - if (!xe_vm_in_fault_mode(vm) && - !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) { - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&userptr->invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - } - - up_write(&vm->userptr.notifier_lock); - - /* - * Preempt fences turn into schedule disables, pipeline these. - * Note that even in fault mode, we need to wait for binds and - * unbinds to complete, and those are attached as BOOKMARK fences - * to the vm. - */ - dma_resv_iter_begin(&cursor, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); - dma_resv_for_each_fence_unlocked(&cursor, fence) - dma_fence_enable_sw_signaling(fence); - dma_resv_iter_end(&cursor); - - err = dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - XE_WARN_ON(err <= 0); - - if (xe_vm_in_fault_mode(vm)) { - err = xe_vm_invalidate_vma(vma); - XE_WARN_ON(err); - } - - trace_xe_vma_userptr_invalidate_complete(vma); - - return true; -} - -static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { - .invalidate = vma_userptr_invalidate, -}; - -int xe_vm_userptr_pin(struct xe_vm *vm) -{ - struct xe_userptr_vma *uvma, *next; - int err = 0; - LIST_HEAD(tmp_evict); - - xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); - lockdep_assert_held_write(&vm->lock); - - /* Collect invalidated userptrs */ - spin_lock(&vm->userptr.invalidated_lock); - list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, - userptr.invalidate_link) { - list_del_init(&uvma->userptr.invalidate_link); - list_move_tail(&uvma->userptr.repin_link, - &vm->userptr.repin_list); - } - spin_unlock(&vm->userptr.invalidated_lock); - - /* Pin and move to temporary list */ - list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, - userptr.repin_link) { - err = xe_vma_userptr_pin_pages(uvma); - if (err == -EFAULT) { - list_del_init(&uvma->userptr.repin_link); - - /* Wait for pending binds */ - xe_vm_lock(vm, false); - dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - - err = xe_vm_invalidate_vma(&uvma->vma); - xe_vm_unlock(vm); - if (err) - return err; - } else { - if (err < 0) - return err; - - list_del_init(&uvma->userptr.repin_link); - list_move_tail(&uvma->vma.combined_links.rebind, - &vm->rebind_list); - } - } - - return 0; -} - -/** - * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs - * that need repinning. - * @vm: The VM. - * - * This function does an advisory check for whether the VM has userptrs that - * need repinning. - * - * Return: 0 if there are no indications of userptrs needing repinning, - * -EAGAIN if there are. - */ -int xe_vm_userptr_check_repin(struct xe_vm *vm) -{ - return (list_empty_careful(&vm->userptr.repin_list) && - list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; -} - -static int xe_vma_ops_alloc(struct xe_vma_ops *vops) -{ - int i; - - for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { - if (!vops->pt_update_ops[i].num_ops) - continue; - - vops->pt_update_ops[i].ops = - kmalloc_array(vops->pt_update_ops[i].num_ops, - sizeof(*vops->pt_update_ops[i].ops), - GFP_KERNEL); - if (!vops->pt_update_ops[i].ops) - return -ENOMEM; - } - - return 0; -} - -static void xe_vma_ops_fini(struct xe_vma_ops *vops) -{ - int i; - - for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) - kfree(vops->pt_update_ops[i].ops); -} - -static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask) -{ - int i; - - for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) - if (BIT(i) & tile_mask) - ++vops->pt_update_ops[i].num_ops; -} - -static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, - u8 tile_mask) -{ - INIT_LIST_HEAD(&op->link); - op->tile_mask = tile_mask; - op->base.op = DRM_GPUVA_OP_MAP; - op->base.map.va.addr = vma->gpuva.va.addr; - op->base.map.va.range = vma->gpuva.va.range; - op->base.map.gem.obj = vma->gpuva.gem.obj; - op->base.map.gem.offset = vma->gpuva.gem.offset; - op->map.vma = vma; - op->map.immediate = true; - op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; - op->map.is_null = xe_vma_is_null(vma); -} - -static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, - u8 tile_mask) -{ - struct xe_vma_op *op; - - op = kzalloc(sizeof(*op), GFP_KERNEL); - if (!op) - return -ENOMEM; - - xe_vm_populate_rebind(op, vma, tile_mask); - list_add_tail(&op->link, &vops->list); - xe_vma_ops_incr_pt_update_ops(vops, tile_mask); - - return 0; -} - -static struct dma_fence *ops_execute(struct xe_vm *vm, - struct xe_vma_ops *vops); -static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, - struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs); - -int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) -{ - struct dma_fence *fence; - struct xe_vma *vma, *next; - struct xe_vma_ops vops; - struct xe_vma_op *op, *next_op; - int err, i; - - lockdep_assert_held(&vm->lock); - if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || - list_empty(&vm->rebind_list)) - return 0; - - xe_vma_ops_init(&vops, vm, NULL, NULL, 0); - for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) - vops.pt_update_ops[i].wait_vm_bookkeep = true; - - xe_vm_assert_held(vm); - list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { - xe_assert(vm->xe, vma->tile_present); - - if (rebind_worker) - trace_xe_vma_rebind_worker(vma); - else - trace_xe_vma_rebind_exec(vma); - - err = xe_vm_ops_add_rebind(&vops, vma, - vma->tile_present); - if (err) - goto free_ops; - } - - err = xe_vma_ops_alloc(&vops); - if (err) - goto free_ops; - - fence = ops_execute(vm, &vops); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - } else { - dma_fence_put(fence); - list_for_each_entry_safe(vma, next, &vm->rebind_list, - combined_links.rebind) - list_del_init(&vma->combined_links.rebind); - } -free_ops: - list_for_each_entry_safe(op, next_op, &vops.list, link) { - list_del(&op->link); - kfree(op); - } - xe_vma_ops_fini(&vops); - - return err; -} - -struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) -{ - struct dma_fence *fence = NULL; - struct xe_vma_ops vops; - struct xe_vma_op *op, *next_op; - struct xe_tile *tile; - u8 id; - int err; - - lockdep_assert_held(&vm->lock); - xe_vm_assert_held(vm); - xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); - - xe_vma_ops_init(&vops, vm, NULL, NULL, 0); - for_each_tile(tile, vm->xe, id) { - vops.pt_update_ops[id].wait_vm_bookkeep = true; - vops.pt_update_ops[tile->id].q = - xe_tile_migrate_exec_queue(tile); - } - - err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); - if (err) - return ERR_PTR(err); - - err = xe_vma_ops_alloc(&vops); - if (err) { - fence = ERR_PTR(err); - goto free_ops; - } - - fence = ops_execute(vm, &vops); - -free_ops: - list_for_each_entry_safe(op, next_op, &vops.list, link) { - list_del(&op->link); - kfree(op); - } - xe_vma_ops_fini(&vops); - - return fence; -} - -static void xe_vma_free(struct xe_vma *vma) -{ - if (xe_vma_is_userptr(vma)) - kfree(to_userptr_vma(vma)); - else - kfree(vma); -} - -#define VMA_CREATE_FLAG_READ_ONLY BIT(0) -#define VMA_CREATE_FLAG_IS_NULL BIT(1) -#define VMA_CREATE_FLAG_DUMPABLE BIT(2) - -static struct xe_vma *xe_vma_create(struct xe_vm *vm, - struct xe_bo *bo, - u64 bo_offset_or_userptr, - u64 start, u64 end, - u16 pat_index, unsigned int flags) -{ - struct xe_vma *vma; - struct xe_tile *tile; - u8 id; - bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); - bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); - bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); - - xe_assert(vm->xe, start < end); - xe_assert(vm->xe, end < vm->size); - - /* - * Allocate and ensure that the xe_vma_is_userptr() return - * matches what was allocated. - */ - if (!bo && !is_null) { - struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); - - if (!uvma) - return ERR_PTR(-ENOMEM); - - vma = &uvma->vma; - } else { - vma = kzalloc(sizeof(*vma), GFP_KERNEL); - if (!vma) - return ERR_PTR(-ENOMEM); - - if (is_null) - vma->gpuva.flags |= DRM_GPUVA_SPARSE; - if (bo) - vma->gpuva.gem.obj = &bo->ttm.base; - } - - INIT_LIST_HEAD(&vma->combined_links.rebind); - - INIT_LIST_HEAD(&vma->gpuva.gem.entry); - vma->gpuva.vm = &vm->gpuvm; - vma->gpuva.va.addr = start; - vma->gpuva.va.range = end - start + 1; - if (read_only) - vma->gpuva.flags |= XE_VMA_READ_ONLY; - if (dumpable) - vma->gpuva.flags |= XE_VMA_DUMPABLE; - - for_each_tile(tile, vm->xe, id) - vma->tile_mask |= 0x1 << id; - - if (vm->xe->info.has_atomic_enable_pte_bit) - vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; - - vma->pat_index = pat_index; - - if (bo) { - struct drm_gpuvm_bo *vm_bo; - - xe_bo_assert_held(bo); - - vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); - if (IS_ERR(vm_bo)) { - xe_vma_free(vma); - return ERR_CAST(vm_bo); - } - - drm_gpuvm_bo_extobj_add(vm_bo); - drm_gem_object_get(&bo->ttm.base); - vma->gpuva.gem.offset = bo_offset_or_userptr; - drm_gpuva_link(&vma->gpuva, vm_bo); - drm_gpuvm_bo_put(vm_bo); - } else /* userptr or null */ { - if (!is_null) { - struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; - u64 size = end - start + 1; - int err; - - INIT_LIST_HEAD(&userptr->invalidate_link); - INIT_LIST_HEAD(&userptr->repin_link); - vma->gpuva.gem.offset = bo_offset_or_userptr; - - err = mmu_interval_notifier_insert(&userptr->notifier, - current->mm, - xe_vma_userptr(vma), size, - &vma_userptr_notifier_ops); - if (err) { - xe_vma_free(vma); - return ERR_PTR(err); - } - - userptr->notifier_seq = LONG_MAX; - } - - xe_vm_get(vm); - } - - return vma; -} - -static void xe_vma_destroy_late(struct xe_vma *vma) -{ - struct xe_vm *vm = xe_vma_vm(vma); - - if (vma->ufence) { - xe_sync_ufence_put(vma->ufence); - vma->ufence = NULL; - } - - if (xe_vma_is_userptr(vma)) { - struct xe_userptr_vma *uvma = to_userptr_vma(vma); - struct xe_userptr *userptr = &uvma->userptr; - - if (userptr->sg) - xe_hmm_userptr_free_sg(uvma); - - /* - * Since userptr pages are not pinned, we can't remove - * the notifer until we're sure the GPU is not accessing - * them anymore - */ - mmu_interval_notifier_remove(&userptr->notifier); - xe_vm_put(vm); - } else if (xe_vma_is_null(vma)) { - xe_vm_put(vm); - } else { - xe_bo_put(xe_vma_bo(vma)); - } - - xe_vma_free(vma); -} - -static void vma_destroy_work_func(struct work_struct *w) -{ - struct xe_vma *vma = - container_of(w, struct xe_vma, destroy_work); - - xe_vma_destroy_late(vma); -} - -static void vma_destroy_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); - - INIT_WORK(&vma->destroy_work, vma_destroy_work_func); - queue_work(system_unbound_wq, &vma->destroy_work); -} - -static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) -{ - struct xe_vm *vm = xe_vma_vm(vma); - - lockdep_assert_held_write(&vm->lock); - xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); - - if (xe_vma_is_userptr(vma)) { - xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); - - spin_lock(&vm->userptr.invalidated_lock); - list_del(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } else if (!xe_vma_is_null(vma)) { - xe_bo_assert_held(xe_vma_bo(vma)); - - drm_gpuva_unlink(&vma->gpuva); - } - - xe_vm_assert_held(vm); - if (fence) { - int ret = dma_fence_add_callback(fence, &vma->destroy_cb, - vma_destroy_cb); - - if (ret) { - XE_WARN_ON(ret != -ENOENT); - xe_vma_destroy_late(vma); - } - } else { - xe_vma_destroy_late(vma); - } -} - -/** - * xe_vm_lock_vma() - drm_exec utility to lock a vma - * @exec: The drm_exec object we're currently locking for. - * @vma: The vma for witch we want to lock the vm resv and any attached - * object's resv. - * - * Return: 0 on success, negative error code on error. In particular - * may return -EDEADLK on WW transaction contention and -EINTR if - * an interruptible wait is terminated by a signal. - */ -int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) -{ - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_bo *bo = xe_vma_bo(vma); - int err; - - XE_WARN_ON(!vm); - - err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); - if (!err && bo && !bo->vm) - err = drm_exec_lock_obj(exec, &bo->ttm.base); - - return err; -} - -static void xe_vma_destroy_unlocked(struct xe_vma *vma) -{ - struct drm_exec exec; - int err; - - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - err = xe_vm_lock_vma(&exec, vma); - drm_exec_retry_on_contention(&exec); - if (XE_WARN_ON(err)) - break; - } - - xe_vma_destroy(vma, NULL); - - drm_exec_fini(&exec); -} - -struct xe_vma * -xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) -{ - struct drm_gpuva *gpuva; - - lockdep_assert_held(&vm->lock); - - if (xe_vm_is_closed_or_banned(vm)) - return NULL; - - xe_assert(vm->xe, start + range <= vm->size); - - gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); - - return gpuva ? gpuva_to_vma(gpuva) : NULL; -} - -static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) -{ - int err; - - xe_assert(vm->xe, xe_vma_vm(vma) == vm); - lockdep_assert_held(&vm->lock); - - mutex_lock(&vm->snap_mutex); - err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); - mutex_unlock(&vm->snap_mutex); - XE_WARN_ON(err); /* Shouldn't be possible */ - - return err; -} - -static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) -{ - xe_assert(vm->xe, xe_vma_vm(vma) == vm); - lockdep_assert_held(&vm->lock); - - mutex_lock(&vm->snap_mutex); - drm_gpuva_remove(&vma->gpuva); - mutex_unlock(&vm->snap_mutex); - if (vm->usm.last_fault_vma == vma) - vm->usm.last_fault_vma = NULL; -} - -static struct drm_gpuva_op *xe_vm_op_alloc(void) -{ - struct xe_vma_op *op; - - op = kzalloc(sizeof(*op), GFP_KERNEL); - - if (unlikely(!op)) - return NULL; - - return &op->base; -} - -static void xe_vm_free(struct drm_gpuvm *gpuvm); - -static const struct drm_gpuvm_ops gpuvm_ops = { - .op_alloc = xe_vm_op_alloc, - .vm_bo_validate = xe_gpuvm_validate, - .vm_free = xe_vm_free, -}; - -static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index) -{ - u64 pte = 0; - - if (pat_index & BIT(0)) - pte |= XE_PPGTT_PTE_PAT0; - - if (pat_index & BIT(1)) - pte |= XE_PPGTT_PTE_PAT1; - - return pte; -} - -static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index, - u32 pt_level) -{ - u64 pte = 0; - - if (pat_index & BIT(0)) - pte |= XE_PPGTT_PTE_PAT0; - - if (pat_index & BIT(1)) - pte |= XE_PPGTT_PTE_PAT1; - - if (pat_index & BIT(2)) { - if (pt_level) - pte |= XE_PPGTT_PDE_PDPE_PAT2; - else - pte |= XE_PPGTT_PTE_PAT2; - } - - if (pat_index & BIT(3)) - pte |= XELPG_PPGTT_PTE_PAT3; - - if (pat_index & (BIT(4))) - pte |= XE2_PPGTT_PTE_PAT4; - - return pte; -} - -static u64 pte_encode_ps(u32 pt_level) -{ - XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); - - if (pt_level == 1) - return XE_PDE_PS_2M; - else if (pt_level == 2) - return XE_PDPE_PS_1G; - - return 0; -} - -static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, - const u16 pat_index) -{ - struct xe_device *xe = xe_bo_device(bo); - u64 pde; - - pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); - pde |= XE_PAGE_PRESENT | XE_PAGE_RW; - pde |= pde_encode_pat_index(xe, pat_index); - - return pde; -} - -static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, - u16 pat_index, u32 pt_level) -{ - struct xe_device *xe = xe_bo_device(bo); - u64 pte; - - pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); - pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); - pte |= pte_encode_ps(pt_level); - - if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) - pte |= XE_PPGTT_PTE_DM; - - return pte; -} - -static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, - u16 pat_index, u32 pt_level) -{ - struct xe_device *xe = xe_vma_vm(vma)->xe; - - pte |= XE_PAGE_PRESENT; - - if (likely(!xe_vma_read_only(vma))) - pte |= XE_PAGE_RW; - - pte |= pte_encode_pat_index(xe, pat_index, pt_level); - pte |= pte_encode_ps(pt_level); - - if (unlikely(xe_vma_is_null(vma))) - pte |= XE_PTE_NULL; - - return pte; -} - -static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, - u16 pat_index, - u32 pt_level, bool devmem, u64 flags) -{ - u64 pte; - - /* Avoid passing random bits directly as flags */ - xe_assert(xe, !(flags & ~XE_PTE_PS64)); - - pte = addr; - pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); - pte |= pte_encode_ps(pt_level); - - if (devmem) - pte |= XE_PPGTT_PTE_DM; - - pte |= flags; - - return pte; -} - -static const struct xe_pt_ops xelp_pt_ops = { - .pte_encode_bo = xelp_pte_encode_bo, - .pte_encode_vma = xelp_pte_encode_vma, - .pte_encode_addr = xelp_pte_encode_addr, - .pde_encode_bo = xelp_pde_encode_bo, -}; - -static void vm_destroy_work_func(struct work_struct *w); - -/** - * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the - * given tile and vm. - * @xe: xe device. - * @tile: tile to set up for. - * @vm: vm to set up for. - * - * Sets up a pagetable tree with one page-table per level and a single - * leaf PTE. All pagetable entries point to the single page-table or, - * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and - * writes become NOPs. - * - * Return: 0 on success, negative error code on error. - */ -static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm) -{ - u8 id = tile->id; - int i; - - for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { - vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); - if (IS_ERR(vm->scratch_pt[id][i])) - return PTR_ERR(vm->scratch_pt[id][i]); - - xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); - } - - return 0; -} - -static void xe_vm_free_scratch(struct xe_vm *vm) -{ - struct xe_tile *tile; - u8 id; - - if (!xe_vm_has_scratch(vm)) - return; - - for_each_tile(tile, vm->xe, id) { - u32 i; - - if (!vm->pt_root[id]) - continue; - - for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) - if (vm->scratch_pt[id][i]) - xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); - } -} - -struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) -{ - struct drm_gem_object *vm_resv_obj; - struct xe_vm *vm; - int err, number_tiles = 0; - struct xe_tile *tile; - u8 id; - - vm = kzalloc(sizeof(*vm), GFP_KERNEL); - if (!vm) - return ERR_PTR(-ENOMEM); - - vm->xe = xe; - - vm->size = 1ull << xe->info.va_bits; - - vm->flags = flags; - - init_rwsem(&vm->lock); - mutex_init(&vm->snap_mutex); - - INIT_LIST_HEAD(&vm->rebind_list); - - INIT_LIST_HEAD(&vm->userptr.repin_list); - INIT_LIST_HEAD(&vm->userptr.invalidated); - init_rwsem(&vm->userptr.notifier_lock); - spin_lock_init(&vm->userptr.invalidated_lock); - - INIT_WORK(&vm->destroy_work, vm_destroy_work_func); - - INIT_LIST_HEAD(&vm->preempt.exec_queues); - vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ - - for_each_tile(tile, xe, id) - xe_range_fence_tree_init(&vm->rftree[id]); - - vm->pt_ops = &xelp_pt_ops; - - /* - * Long-running workloads are not protected by the scheduler references. - * By design, run_job for long-running workloads returns NULL and the - * scheduler drops all the references of it, hence protecting the VM - * for this case is necessary. - */ - if (flags & XE_VM_FLAG_LR_MODE) - xe_pm_runtime_get_noresume(xe); - - vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); - if (!vm_resv_obj) { - err = -ENOMEM; - goto err_no_resv; - } - - drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, - vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); - - drm_gem_object_put(vm_resv_obj); - - err = xe_vm_lock(vm, true); - if (err) - goto err_close; - - if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) - vm->flags |= XE_VM_FLAG_64K; - - for_each_tile(tile, xe, id) { - if (flags & XE_VM_FLAG_MIGRATION && - tile->id != XE_VM_FLAG_TILE_ID(flags)) - continue; - - vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); - if (IS_ERR(vm->pt_root[id])) { - err = PTR_ERR(vm->pt_root[id]); - vm->pt_root[id] = NULL; - goto err_unlock_close; - } - } - - if (xe_vm_has_scratch(vm)) { - for_each_tile(tile, xe, id) { - if (!vm->pt_root[id]) - continue; - - err = xe_vm_create_scratch(xe, tile, vm); - if (err) - goto err_unlock_close; - } - vm->batch_invalidate_tlb = true; - } - - if (vm->flags & XE_VM_FLAG_LR_MODE) { - INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); - vm->batch_invalidate_tlb = false; - } - - /* Fill pt_root after allocating scratch tables */ - for_each_tile(tile, xe, id) { - if (!vm->pt_root[id]) - continue; - - xe_pt_populate_empty(tile, vm, vm->pt_root[id]); - } - xe_vm_unlock(vm); - - /* Kernel migration VM shouldn't have a circular loop.. */ - if (!(flags & XE_VM_FLAG_MIGRATION)) { - for_each_tile(tile, xe, id) { - struct xe_gt *gt = tile->primary_gt; - struct xe_vm *migrate_vm; - struct xe_exec_queue *q; - u32 create_flags = EXEC_QUEUE_FLAG_VM; - - if (!vm->pt_root[id]) - continue; - - migrate_vm = xe_migrate_get_vm(tile->migrate); - q = xe_exec_queue_create_class(xe, gt, migrate_vm, - XE_ENGINE_CLASS_COPY, - create_flags); - xe_vm_put(migrate_vm); - if (IS_ERR(q)) { - err = PTR_ERR(q); - goto err_close; - } - vm->q[id] = q; - number_tiles++; - } - } - - if (number_tiles > 1) - vm->composite_fence_ctx = dma_fence_context_alloc(1); - - mutex_lock(&xe->usm.lock); - if (flags & XE_VM_FLAG_FAULT_MODE) - xe->usm.num_vm_in_fault_mode++; - else if (!(flags & XE_VM_FLAG_MIGRATION)) - xe->usm.num_vm_in_non_fault_mode++; - mutex_unlock(&xe->usm.lock); - - trace_xe_vm_create(vm); - - return vm; - -err_unlock_close: - xe_vm_unlock(vm); -err_close: - xe_vm_close_and_put(vm); - return ERR_PTR(err); - -err_no_resv: - mutex_destroy(&vm->snap_mutex); - for_each_tile(tile, xe, id) - xe_range_fence_tree_fini(&vm->rftree[id]); - kfree(vm); - if (flags & XE_VM_FLAG_LR_MODE) - xe_pm_runtime_put(xe); - return ERR_PTR(err); -} - -static void xe_vm_close(struct xe_vm *vm) -{ - down_write(&vm->lock); - vm->size = 0; - up_write(&vm->lock); -} - -void xe_vm_close_and_put(struct xe_vm *vm) -{ - LIST_HEAD(contested); - struct xe_device *xe = vm->xe; - struct xe_tile *tile; - struct xe_vma *vma, *next_vma; - struct drm_gpuva *gpuva, *next; - u8 id; - - xe_assert(xe, !vm->preempt.num_exec_queues); - - xe_vm_close(vm); - if (xe_vm_in_preempt_fence_mode(vm)) - flush_work(&vm->preempt.rebind_work); - - down_write(&vm->lock); - for_each_tile(tile, xe, id) { - if (vm->q[id]) - xe_exec_queue_last_fence_put(vm->q[id], vm); - } - up_write(&vm->lock); - - for_each_tile(tile, xe, id) { - if (vm->q[id]) { - xe_exec_queue_kill(vm->q[id]); - xe_exec_queue_put(vm->q[id]); - vm->q[id] = NULL; - } - } - - down_write(&vm->lock); - xe_vm_lock(vm, false); - drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { - vma = gpuva_to_vma(gpuva); - - if (xe_vma_has_no_bo(vma)) { - down_read(&vm->userptr.notifier_lock); - vma->gpuva.flags |= XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); - } - - xe_vm_remove_vma(vm, vma); - - /* easy case, remove from VMA? */ - if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { - list_del_init(&vma->combined_links.rebind); - xe_vma_destroy(vma, NULL); - continue; - } - - list_move_tail(&vma->combined_links.destroy, &contested); - vma->gpuva.flags |= XE_VMA_DESTROYED; - } - - /* - * All vm operations will add shared fences to resv. - * The only exception is eviction for a shared object, - * but even so, the unbind when evicted would still - * install a fence to resv. Hence it's safe to - * destroy the pagetables immediately. - */ - xe_vm_free_scratch(vm); - - for_each_tile(tile, xe, id) { - if (vm->pt_root[id]) { - xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); - vm->pt_root[id] = NULL; - } - } - xe_vm_unlock(vm); - - /* - * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL - * Since we hold a refcount to the bo, we can remove and free - * the members safely without locking. - */ - list_for_each_entry_safe(vma, next_vma, &contested, - combined_links.destroy) { - list_del_init(&vma->combined_links.destroy); - xe_vma_destroy_unlocked(vma); - } - - up_write(&vm->lock); - - mutex_lock(&xe->usm.lock); - if (vm->flags & XE_VM_FLAG_FAULT_MODE) - xe->usm.num_vm_in_fault_mode--; - else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) - xe->usm.num_vm_in_non_fault_mode--; - - if (vm->usm.asid) { - void *lookup; - - xe_assert(xe, xe->info.has_asid); - xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); - - lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); - xe_assert(xe, lookup == vm); - } - mutex_unlock(&xe->usm.lock); - - for_each_tile(tile, xe, id) - xe_range_fence_tree_fini(&vm->rftree[id]); - - xe_vm_put(vm); -} - -static void vm_destroy_work_func(struct work_struct *w) -{ - struct xe_vm *vm = - container_of(w, struct xe_vm, destroy_work); - struct xe_device *xe = vm->xe; - struct xe_tile *tile; - u8 id; - - /* xe_vm_close_and_put was not called? */ - xe_assert(xe, !vm->size); - - if (xe_vm_in_preempt_fence_mode(vm)) - flush_work(&vm->preempt.rebind_work); - - mutex_destroy(&vm->snap_mutex); - - if (vm->flags & XE_VM_FLAG_LR_MODE) - xe_pm_runtime_put(xe); - - for_each_tile(tile, xe, id) - XE_WARN_ON(vm->pt_root[id]); - - trace_xe_vm_free(vm); - - if (vm->xef) - xe_file_put(vm->xef); - - kfree(vm); -} - -static void xe_vm_free(struct drm_gpuvm *gpuvm) -{ - struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); - - /* To destroy the VM we need to be able to sleep */ - queue_work(system_unbound_wq, &vm->destroy_work); -} - -struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) -{ - struct xe_vm *vm; - - mutex_lock(&xef->vm.lock); - vm = xa_load(&xef->vm.xa, id); - if (vm) - xe_vm_get(vm); - mutex_unlock(&xef->vm.lock); - - return vm; -} - -u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) -{ - return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0, - tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); -} - -static struct xe_exec_queue * -to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) -{ - return q ? q : vm->q[0]; -} - -static struct xe_user_fence * -find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) -{ - unsigned int i; - - for (i = 0; i < num_syncs; i++) { - struct xe_sync_entry *e = &syncs[i]; - - if (xe_sync_is_ufence(e)) - return xe_sync_ufence_get(e); - } - - return NULL; -} - -#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ - DRM_XE_VM_CREATE_FLAG_LR_MODE | \ - DRM_XE_VM_CREATE_FLAG_FAULT_MODE) - -int xe_vm_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_vm_create *args = data; - struct xe_tile *tile; - struct xe_vm *vm; - u32 id, asid; - int err; - u32 flags = 0; - - if (XE_IOCTL_DBG(xe, args->extensions)) - return -EINVAL; - - if (XE_WA(xe_root_mmio_gt(xe), 14016763929)) - args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; - - if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && - !xe->info.has_usm)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && - args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && - args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && - xe_device_in_non_fault_mode(xe))) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) && - xe_device_in_fault_mode(xe))) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, args->extensions)) - return -EINVAL; - - if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) - flags |= XE_VM_FLAG_SCRATCH_PAGE; - if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) - flags |= XE_VM_FLAG_LR_MODE; - if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) - flags |= XE_VM_FLAG_FAULT_MODE; - - vm = xe_vm_create(xe, flags); - if (IS_ERR(vm)) - return PTR_ERR(vm); - - mutex_lock(&xef->vm.lock); - err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); - mutex_unlock(&xef->vm.lock); - if (err) - goto err_close_and_put; - - if (xe->info.has_asid) { - mutex_lock(&xe->usm.lock); - err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, - XA_LIMIT(1, XE_MAX_ASID - 1), - &xe->usm.next_asid, GFP_KERNEL); - mutex_unlock(&xe->usm.lock); - if (err < 0) - goto err_free_id; - - vm->usm.asid = asid; - } - - args->vm_id = id; - vm->xef = xe_file_get(xef); - - /* Record BO memory for VM pagetable created against client */ - for_each_tile(tile, xe, id) - if (vm->pt_root[id]) - xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); - -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) - /* Warning: Security issue - never enable by default */ - args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); -#endif - - return 0; - -err_free_id: - mutex_lock(&xef->vm.lock); - xa_erase(&xef->vm.xa, id); - mutex_unlock(&xef->vm.lock); -err_close_and_put: - xe_vm_close_and_put(vm); - - return err; -} - -int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_vm_destroy *args = data; - struct xe_vm *vm; - int err = 0; - - if (XE_IOCTL_DBG(xe, args->pad) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - mutex_lock(&xef->vm.lock); - vm = xa_load(&xef->vm.xa, args->vm_id); - if (XE_IOCTL_DBG(xe, !vm)) - err = -ENOENT; - else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) - err = -EBUSY; - else - xa_erase(&xef->vm.xa, args->vm_id); - mutex_unlock(&xef->vm.lock); - - if (!err) - xe_vm_close_and_put(vm); - - return err; -} - -static const u32 region_to_mem_type[] = { - XE_PL_TT, - XE_PL_VRAM0, - XE_PL_VRAM1, -}; - -static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, - bool post_commit) -{ - down_read(&vm->userptr.notifier_lock); - vma->gpuva.flags |= XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); - if (post_commit) - xe_vm_remove_vma(vm, vma); -} - -#undef ULL -#define ULL unsigned long long - -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) -static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) -{ - struct xe_vma *vma; - - switch (op->op) { - case DRM_GPUVA_OP_MAP: - vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", - (ULL)op->map.va.addr, (ULL)op->map.va.range); - break; - case DRM_GPUVA_OP_REMAP: - vma = gpuva_to_vma(op->remap.unmap->va); - vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", - (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), - op->remap.unmap->keep ? 1 : 0); - if (op->remap.prev) - vm_dbg(&xe->drm, - "REMAP:PREV: addr=0x%016llx, range=0x%016llx", - (ULL)op->remap.prev->va.addr, - (ULL)op->remap.prev->va.range); - if (op->remap.next) - vm_dbg(&xe->drm, - "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", - (ULL)op->remap.next->va.addr, - (ULL)op->remap.next->va.range); - break; - case DRM_GPUVA_OP_UNMAP: - vma = gpuva_to_vma(op->unmap.va); - vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", - (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), - op->unmap.keep ? 1 : 0); - break; - case DRM_GPUVA_OP_PREFETCH: - vma = gpuva_to_vma(op->prefetch.va); - vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", - (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); - break; - default: - drm_warn(&xe->drm, "NOT POSSIBLE"); - } -} -#else -static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) -{ -} -#endif - -/* - * Create operations list from IOCTL arguments, setup operations fields so parse - * and commit steps are decoupled from IOCTL arguments. This step can fail. - */ -static struct drm_gpuva_ops * -vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, - u64 bo_offset_or_userptr, u64 addr, u64 range, - u32 operation, u32 flags, - u32 prefetch_region, u16 pat_index) -{ - struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; - struct drm_gpuva_ops *ops; - struct drm_gpuva_op *__op; - struct drm_gpuvm_bo *vm_bo; - int err; - - lockdep_assert_held_write(&vm->lock); - - vm_dbg(&vm->xe->drm, - "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", - operation, (ULL)addr, (ULL)range, - (ULL)bo_offset_or_userptr); - - switch (operation) { - case DRM_XE_VM_BIND_OP_MAP: - case DRM_XE_VM_BIND_OP_MAP_USERPTR: - ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, - obj, bo_offset_or_userptr); - break; - case DRM_XE_VM_BIND_OP_UNMAP: - ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); - break; - case DRM_XE_VM_BIND_OP_PREFETCH: - ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); - break; - case DRM_XE_VM_BIND_OP_UNMAP_ALL: - xe_assert(vm->xe, bo); - - err = xe_bo_lock(bo, true); - if (err) - return ERR_PTR(err); - - vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); - if (IS_ERR(vm_bo)) { - xe_bo_unlock(bo); - return ERR_CAST(vm_bo); - } - - ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); - drm_gpuvm_bo_put(vm_bo); - xe_bo_unlock(bo); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - ops = ERR_PTR(-EINVAL); - } - if (IS_ERR(ops)) - return ops; - - drm_gpuva_for_each_op(__op, ops) { - struct xe_vma_op *op = gpuva_op_to_vma_op(__op); - - if (__op->op == DRM_GPUVA_OP_MAP) { - op->map.immediate = - flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; - op->map.read_only = - flags & DRM_XE_VM_BIND_FLAG_READONLY; - op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; - op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; - op->map.pat_index = pat_index; - } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { - op->prefetch.region = prefetch_region; - } - - print_op(vm->xe, __op); - } - - return ops; -} - -static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, - u16 pat_index, unsigned int flags) -{ - struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; - struct drm_exec exec; - struct xe_vma *vma; - int err = 0; - - lockdep_assert_held_write(&vm->lock); - - if (bo) { - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); - drm_exec_until_all_locked(&exec) { - err = 0; - if (!bo->vm) { - err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); - drm_exec_retry_on_contention(&exec); - } - if (!err) { - err = drm_exec_lock_obj(&exec, &bo->ttm.base); - drm_exec_retry_on_contention(&exec); - } - if (err) { - drm_exec_fini(&exec); - return ERR_PTR(err); - } - } - } - vma = xe_vma_create(vm, bo, op->gem.offset, - op->va.addr, op->va.addr + - op->va.range - 1, pat_index, flags); - if (IS_ERR(vma)) - goto err_unlock; - - if (xe_vma_is_userptr(vma)) - err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); - else if (!xe_vma_has_no_bo(vma) && !bo->vm) - err = add_preempt_fences(vm, bo); - -err_unlock: - if (bo) - drm_exec_fini(&exec); - - if (err) { - prep_vma_destroy(vm, vma, false); - xe_vma_destroy_unlocked(vma); - vma = ERR_PTR(err); - } - - return vma; -} - -static u64 xe_vma_max_pte_size(struct xe_vma *vma) -{ - if (vma->gpuva.flags & XE_VMA_PTE_1G) - return SZ_1G; - else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) - return SZ_2M; - else if (vma->gpuva.flags & XE_VMA_PTE_64K) - return SZ_64K; - else if (vma->gpuva.flags & XE_VMA_PTE_4K) - return SZ_4K; - - return SZ_1G; /* Uninitialized, used max size */ -} - -static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) -{ - switch (size) { - case SZ_1G: - vma->gpuva.flags |= XE_VMA_PTE_1G; - break; - case SZ_2M: - vma->gpuva.flags |= XE_VMA_PTE_2M; - break; - case SZ_64K: - vma->gpuva.flags |= XE_VMA_PTE_64K; - break; - case SZ_4K: - vma->gpuva.flags |= XE_VMA_PTE_4K; - break; - } -} - -static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) -{ - int err = 0; - - lockdep_assert_held_write(&vm->lock); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - err |= xe_vm_insert_vma(vm, op->map.vma); - if (!err) - op->flags |= XE_VMA_OP_COMMITTED; - break; - case DRM_GPUVA_OP_REMAP: - { - u8 tile_present = - gpuva_to_vma(op->base.remap.unmap->va)->tile_present; - - prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), - true); - op->flags |= XE_VMA_OP_COMMITTED; - - if (op->remap.prev) { - err |= xe_vm_insert_vma(vm, op->remap.prev); - if (!err) - op->flags |= XE_VMA_OP_PREV_COMMITTED; - if (!err && op->remap.skip_prev) { - op->remap.prev->tile_present = - tile_present; - op->remap.prev = NULL; - } - } - if (op->remap.next) { - err |= xe_vm_insert_vma(vm, op->remap.next); - if (!err) - op->flags |= XE_VMA_OP_NEXT_COMMITTED; - if (!err && op->remap.skip_next) { - op->remap.next->tile_present = - tile_present; - op->remap.next = NULL; - } - } - - /* Adjust for partial unbind after removin VMA from VM */ - if (!err) { - op->base.remap.unmap->va->va.addr = op->remap.start; - op->base.remap.unmap->va->va.range = op->remap.range; - } - break; - } - case DRM_GPUVA_OP_UNMAP: - prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); - op->flags |= XE_VMA_OP_COMMITTED; - break; - case DRM_GPUVA_OP_PREFETCH: - op->flags |= XE_VMA_OP_COMMITTED; - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, - struct xe_vma_ops *vops) -{ - struct xe_device *xe = vm->xe; - struct drm_gpuva_op *__op; - struct xe_tile *tile; - u8 id, tile_mask = 0; - int err = 0; - - lockdep_assert_held_write(&vm->lock); - - for_each_tile(tile, vm->xe, id) - tile_mask |= 0x1 << id; - - drm_gpuva_for_each_op(__op, ops) { - struct xe_vma_op *op = gpuva_op_to_vma_op(__op); - struct xe_vma *vma; - unsigned int flags = 0; - - INIT_LIST_HEAD(&op->link); - list_add_tail(&op->link, &vops->list); - op->tile_mask = tile_mask; - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - { - flags |= op->map.read_only ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->map.is_null ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->map.dumpable ? - VMA_CREATE_FLAG_DUMPABLE : 0; - - vma = new_vma(vm, &op->base.map, op->map.pat_index, - flags); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - op->map.vma = vma; - if (op->map.immediate || !xe_vm_in_fault_mode(vm)) - xe_vma_ops_incr_pt_update_ops(vops, - op->tile_mask); - break; - } - case DRM_GPUVA_OP_REMAP: - { - struct xe_vma *old = - gpuva_to_vma(op->base.remap.unmap->va); - - op->remap.start = xe_vma_start(old); - op->remap.range = xe_vma_size(old); - - if (op->base.remap.prev) { - flags |= op->base.remap.unmap->va->flags & - XE_VMA_READ_ONLY ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->base.remap.unmap->va->flags & - DRM_GPUVA_SPARSE ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->base.remap.unmap->va->flags & - XE_VMA_DUMPABLE ? - VMA_CREATE_FLAG_DUMPABLE : 0; - - vma = new_vma(vm, op->base.remap.prev, - old->pat_index, flags); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - op->remap.prev = vma; - - /* - * Userptr creates a new SG mapping so - * we must also rebind. - */ - op->remap.skip_prev = !xe_vma_is_userptr(old) && - IS_ALIGNED(xe_vma_end(vma), - xe_vma_max_pte_size(old)); - if (op->remap.skip_prev) { - xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); - op->remap.range -= - xe_vma_end(vma) - - xe_vma_start(old); - op->remap.start = xe_vma_end(vma); - vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", - (ULL)op->remap.start, - (ULL)op->remap.range); - } else { - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); - } - } - - if (op->base.remap.next) { - flags |= op->base.remap.unmap->va->flags & - XE_VMA_READ_ONLY ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->base.remap.unmap->va->flags & - DRM_GPUVA_SPARSE ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->base.remap.unmap->va->flags & - XE_VMA_DUMPABLE ? - VMA_CREATE_FLAG_DUMPABLE : 0; - - vma = new_vma(vm, op->base.remap.next, - old->pat_index, flags); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - op->remap.next = vma; - - /* - * Userptr creates a new SG mapping so - * we must also rebind. - */ - op->remap.skip_next = !xe_vma_is_userptr(old) && - IS_ALIGNED(xe_vma_start(vma), - xe_vma_max_pte_size(old)); - if (op->remap.skip_next) { - xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); - op->remap.range -= - xe_vma_end(old) - - xe_vma_start(vma); - vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", - (ULL)op->remap.start, - (ULL)op->remap.range); - } else { - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); - } - } - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); - break; - } - case DRM_GPUVA_OP_UNMAP: - case DRM_GPUVA_OP_PREFETCH: - /* FIXME: Need to skip some prefetch ops */ - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - err = xe_vma_op_commit(vm, op); - if (err) - return err; - } - - return 0; -} - -static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, - bool post_commit, bool prev_post_commit, - bool next_post_commit) -{ - lockdep_assert_held_write(&vm->lock); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (op->map.vma) { - prep_vma_destroy(vm, op->map.vma, post_commit); - xe_vma_destroy_unlocked(op->map.vma); - } - break; - case DRM_GPUVA_OP_UNMAP: - { - struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); - - if (vma) { - down_read(&vm->userptr.notifier_lock); - vma->gpuva.flags &= ~XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); - if (post_commit) - xe_vm_insert_vma(vm, vma); - } - break; - } - case DRM_GPUVA_OP_REMAP: - { - struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); - - if (op->remap.prev) { - prep_vma_destroy(vm, op->remap.prev, prev_post_commit); - xe_vma_destroy_unlocked(op->remap.prev); - } - if (op->remap.next) { - prep_vma_destroy(vm, op->remap.next, next_post_commit); - xe_vma_destroy_unlocked(op->remap.next); - } - if (vma) { - down_read(&vm->userptr.notifier_lock); - vma->gpuva.flags &= ~XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); - if (post_commit) - xe_vm_insert_vma(vm, vma); - } - break; - } - case DRM_GPUVA_OP_PREFETCH: - /* Nothing to do */ - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } -} - -static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, - struct drm_gpuva_ops **ops, - int num_ops_list) -{ - int i; - - for (i = num_ops_list - 1; i >= 0; --i) { - struct drm_gpuva_ops *__ops = ops[i]; - struct drm_gpuva_op *__op; - - if (!__ops) - continue; - - drm_gpuva_for_each_op_reverse(__op, __ops) { - struct xe_vma_op *op = gpuva_op_to_vma_op(__op); - - xe_vma_op_unwind(vm, op, - op->flags & XE_VMA_OP_COMMITTED, - op->flags & XE_VMA_OP_PREV_COMMITTED, - op->flags & XE_VMA_OP_NEXT_COMMITTED); - } - } -} - -static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, - bool validate) -{ - struct xe_bo *bo = xe_vma_bo(vma); - int err = 0; - - if (bo) { - if (!bo->vm) - err = drm_exec_lock_obj(exec, &bo->ttm.base); - if (!err && validate) - err = xe_bo_validate(bo, xe_vma_vm(vma), true); - } - - return err; -} - -static int check_ufence(struct xe_vma *vma) -{ - if (vma->ufence) { - struct xe_user_fence * const f = vma->ufence; - - if (!xe_sync_ufence_get_status(f)) - return -EBUSY; - - vma->ufence = NULL; - xe_sync_ufence_put(f); - } - - return 0; -} - -static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, - struct xe_vma_op *op) -{ - int err = 0; - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - err = vma_lock_and_validate(exec, op->map.vma, - !xe_vm_in_fault_mode(vm) || - op->map.immediate); - break; - case DRM_GPUVA_OP_REMAP: - err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); - if (err) - break; - - err = vma_lock_and_validate(exec, - gpuva_to_vma(op->base.remap.unmap->va), - false); - if (!err && op->remap.prev) - err = vma_lock_and_validate(exec, op->remap.prev, true); - if (!err && op->remap.next) - err = vma_lock_and_validate(exec, op->remap.next, true); - break; - case DRM_GPUVA_OP_UNMAP: - err = check_ufence(gpuva_to_vma(op->base.unmap.va)); - if (err) - break; - - err = vma_lock_and_validate(exec, - gpuva_to_vma(op->base.unmap.va), - false); - break; - case DRM_GPUVA_OP_PREFETCH: - { - struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); - u32 region = op->prefetch.region; - - xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); - - err = vma_lock_and_validate(exec, - gpuva_to_vma(op->base.prefetch.va), - false); - if (!err && !xe_vma_has_no_bo(vma)) - err = xe_bo_migrate(xe_vma_bo(vma), - region_to_mem_type[region]); - break; - } - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, - struct xe_vm *vm, - struct xe_vma_ops *vops) -{ - struct xe_vma_op *op; - int err; - - err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); - if (err) - return err; - - list_for_each_entry(op, &vops->list, link) { - err = op_lock_and_prep(exec, vm, op); - if (err) - return err; - } - - return 0; -} - -static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) -{ - struct xe_exec_queue *q = vops->q; - struct xe_tile *tile; - int number_tiles = 0; - u8 id; - - for_each_tile(tile, vm->xe, id) { - if (vops->pt_update_ops[id].num_ops) - ++number_tiles; - - if (vops->pt_update_ops[id].q) - continue; - - if (q) { - vops->pt_update_ops[id].q = q; - if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) - q = list_next_entry(q, multi_gt_list); - } else { - vops->pt_update_ops[id].q = vm->q[id]; - } - } - - return number_tiles; -} - -static struct dma_fence *ops_execute(struct xe_vm *vm, - struct xe_vma_ops *vops) -{ - struct xe_tile *tile; - struct dma_fence *fence = NULL; - struct dma_fence **fences = NULL; - struct dma_fence_array *cf = NULL; - int number_tiles = 0, current_fence = 0, err; - u8 id; - - number_tiles = vm_ops_setup_tile_args(vm, vops); - if (number_tiles == 0) - return ERR_PTR(-ENODATA); - - if (number_tiles > 1) { - fences = kmalloc_array(number_tiles, sizeof(*fences), - GFP_KERNEL); - if (!fences) - return ERR_PTR(-ENOMEM); - } - - for_each_tile(tile, vm->xe, id) { - if (!vops->pt_update_ops[id].num_ops) - continue; - - err = xe_pt_update_ops_prepare(tile, vops); - if (err) { - fence = ERR_PTR(err); - goto err_out; - } - } - - for_each_tile(tile, vm->xe, id) { - if (!vops->pt_update_ops[id].num_ops) - continue; - - fence = xe_pt_update_ops_run(tile, vops); - if (IS_ERR(fence)) - goto err_out; - - if (fences) - fences[current_fence++] = fence; - } - - if (fences) { - cf = dma_fence_array_create(number_tiles, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - if (!cf) { - --vm->composite_fence_seqno; - fence = ERR_PTR(-ENOMEM); - goto err_out; - } - fence = &cf->base; - } - - for_each_tile(tile, vm->xe, id) { - if (!vops->pt_update_ops[id].num_ops) - continue; - - xe_pt_update_ops_fini(tile, vops); - } - - return fence; - -err_out: - for_each_tile(tile, vm->xe, id) { - if (!vops->pt_update_ops[id].num_ops) - continue; - - xe_pt_update_ops_abort(tile, vops); - } - while (current_fence) - dma_fence_put(fences[--current_fence]); - kfree(fences); - kfree(cf); - - return fence; -} - -static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) -{ - if (vma->ufence) - xe_sync_ufence_put(vma->ufence); - vma->ufence = __xe_sync_ufence_get(ufence); -} - -static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, - struct xe_user_fence *ufence) -{ - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - vma_add_ufence(op->map.vma, ufence); - break; - case DRM_GPUVA_OP_REMAP: - if (op->remap.prev) - vma_add_ufence(op->remap.prev, ufence); - if (op->remap.next) - vma_add_ufence(op->remap.next, ufence); - break; - case DRM_GPUVA_OP_UNMAP: - break; - case DRM_GPUVA_OP_PREFETCH: - vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } -} - -static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, - struct dma_fence *fence) -{ - struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); - struct xe_user_fence *ufence; - struct xe_vma_op *op; - int i; - - ufence = find_ufence_get(vops->syncs, vops->num_syncs); - list_for_each_entry(op, &vops->list, link) { - if (ufence) - op_add_ufence(vm, op, ufence); - - if (op->base.op == DRM_GPUVA_OP_UNMAP) - xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); - else if (op->base.op == DRM_GPUVA_OP_REMAP) - xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), - fence); - } - if (ufence) - xe_sync_ufence_put(ufence); - for (i = 0; i < vops->num_syncs; i++) - xe_sync_entry_signal(vops->syncs + i, fence); - xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); - dma_fence_put(fence); -} - -static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, - struct xe_vma_ops *vops) -{ - struct drm_exec exec; - struct dma_fence *fence; - int err; - - lockdep_assert_held_write(&vm->lock); - - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | - DRM_EXEC_IGNORE_DUPLICATES, 0); - drm_exec_until_all_locked(&exec) { - err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); - drm_exec_retry_on_contention(&exec); - if (err) - goto unlock; - - fence = ops_execute(vm, vops); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - goto unlock; - } - - vm_bind_ioctl_ops_fini(vm, vops, fence); - } - -unlock: - drm_exec_fini(&exec); - return err; -} - -#define SUPPORTED_FLAGS \ - (DRM_XE_VM_BIND_FLAG_READONLY | \ - DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ - DRM_XE_VM_BIND_FLAG_NULL | \ - DRM_XE_VM_BIND_FLAG_DUMPABLE) -#define XE_64K_PAGE_MASK 0xffffull -#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) - -static int vm_bind_ioctl_check_args(struct xe_device *xe, - struct drm_xe_vm_bind *args, - struct drm_xe_vm_bind_op **bind_ops) -{ - int err; - int i; - - if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, args->extensions)) - return -EINVAL; - - if (args->num_binds > 1) { - u64 __user *bind_user = - u64_to_user_ptr(args->vector_of_binds); - - *bind_ops = kvmalloc_array(args->num_binds, - sizeof(struct drm_xe_vm_bind_op), - GFP_KERNEL | __GFP_ACCOUNT); - if (!*bind_ops) - return -ENOMEM; - - err = __copy_from_user(*bind_ops, bind_user, - sizeof(struct drm_xe_vm_bind_op) * - args->num_binds); - if (XE_IOCTL_DBG(xe, err)) { - err = -EFAULT; - goto free_bind_ops; - } - } else { - *bind_ops = &args->bind; - } - - for (i = 0; i < args->num_binds; ++i) { - u64 range = (*bind_ops)[i].range; - u64 addr = (*bind_ops)[i].addr; - u32 op = (*bind_ops)[i].op; - u32 flags = (*bind_ops)[i].flags; - u32 obj = (*bind_ops)[i].obj; - u64 obj_offset = (*bind_ops)[i].obj_offset; - u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; - bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; - u16 pat_index = (*bind_ops)[i].pat_index; - u16 coh_mode; - - if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { - err = -EINVAL; - goto free_bind_ops; - } - - pat_index = array_index_nospec(pat_index, xe->pat.n_entries); - (*bind_ops)[i].pat_index = pat_index; - coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); - if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ - err = -EINVAL; - goto free_bind_ops; - } - - if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { - err = -EINVAL; - goto free_bind_ops; - } - - if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || - XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || - XE_IOCTL_DBG(xe, obj && is_null) || - XE_IOCTL_DBG(xe, obj_offset && is_null) || - XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && - is_null) || - XE_IOCTL_DBG(xe, !obj && - op == DRM_XE_VM_BIND_OP_MAP && - !is_null) || - XE_IOCTL_DBG(xe, !obj && - op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || - XE_IOCTL_DBG(xe, addr && - op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || - XE_IOCTL_DBG(xe, range && - op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || - XE_IOCTL_DBG(xe, obj && - op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || - XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && - op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || - XE_IOCTL_DBG(xe, obj && - op == DRM_XE_VM_BIND_OP_PREFETCH) || - XE_IOCTL_DBG(xe, prefetch_region && - op != DRM_XE_VM_BIND_OP_PREFETCH) || - XE_IOCTL_DBG(xe, !(BIT(prefetch_region) & - xe->info.mem_region_mask)) || - XE_IOCTL_DBG(xe, obj && - op == DRM_XE_VM_BIND_OP_UNMAP)) { - err = -EINVAL; - goto free_bind_ops; - } - - if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || - XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || - XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || - XE_IOCTL_DBG(xe, !range && - op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { - err = -EINVAL; - goto free_bind_ops; - } - } - - return 0; - -free_bind_ops: - if (args->num_binds > 1) - kvfree(*bind_ops); - return err; -} - -static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, - struct xe_exec_queue *q, - struct xe_sync_entry *syncs, - int num_syncs) -{ - struct dma_fence *fence; - int i, err = 0; - - fence = xe_sync_in_fence_get(syncs, num_syncs, - to_wait_exec_queue(vm, q), vm); - if (IS_ERR(fence)) - return PTR_ERR(fence); - - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], fence); - - xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, - fence); - dma_fence_put(fence); - - return err; -} - -static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, - struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs) -{ - memset(vops, 0, sizeof(*vops)); - INIT_LIST_HEAD(&vops->list); - vops->vm = vm; - vops->q = q; - vops->syncs = syncs; - vops->num_syncs = num_syncs; -} - -static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, - u64 addr, u64 range, u64 obj_offset, - u16 pat_index) -{ - u16 coh_mode; - - if (XE_IOCTL_DBG(xe, range > bo->size) || - XE_IOCTL_DBG(xe, obj_offset > - bo->size - range)) { - return -EINVAL; - } - - if (bo->flags & XE_BO_FLAG_INTERNAL_64K) { - if (XE_IOCTL_DBG(xe, obj_offset & - XE_64K_PAGE_MASK) || - XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || - XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { - return -EINVAL; - } - } - - coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); - if (bo->cpu_caching) { - if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && - bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { - return -EINVAL; - } - } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { - /* - * Imported dma-buf from a different device should - * require 1way or 2way coherency since we don't know - * how it was mapped on the CPU. Just assume is it - * potentially cached on CPU side. - */ - return -EINVAL; - } - - return 0; -} - -int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_vm_bind *args = data; - struct drm_xe_sync __user *syncs_user; - struct xe_bo **bos = NULL; - struct drm_gpuva_ops **ops = NULL; - struct xe_vm *vm; - struct xe_exec_queue *q = NULL; - u32 num_syncs, num_ufence = 0; - struct xe_sync_entry *syncs = NULL; - struct drm_xe_vm_bind_op *bind_ops; - struct xe_vma_ops vops; - int err; - int i; - - err = vm_bind_ioctl_check_args(xe, args, &bind_ops); - if (err) - return err; - - if (args->exec_queue_id) { - q = xe_exec_queue_lookup(xef, args->exec_queue_id); - if (XE_IOCTL_DBG(xe, !q)) { - err = -ENOENT; - goto free_objs; - } - - if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { - err = -EINVAL; - goto put_exec_queue; - } - } - - vm = xe_vm_lookup(xef, args->vm_id); - if (XE_IOCTL_DBG(xe, !vm)) { - err = -EINVAL; - goto put_exec_queue; - } - - err = down_write_killable(&vm->lock); - if (err) - goto put_vm; - - if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { - err = -ENOENT; - goto release_vm_lock; - } - - for (i = 0; i < args->num_binds; ++i) { - u64 range = bind_ops[i].range; - u64 addr = bind_ops[i].addr; - - if (XE_IOCTL_DBG(xe, range > vm->size) || - XE_IOCTL_DBG(xe, addr > vm->size - range)) { - err = -EINVAL; - goto release_vm_lock; - } - } - - if (args->num_binds) { - bos = kvcalloc(args->num_binds, sizeof(*bos), - GFP_KERNEL | __GFP_ACCOUNT); - if (!bos) { - err = -ENOMEM; - goto release_vm_lock; - } - - ops = kvcalloc(args->num_binds, sizeof(*ops), - GFP_KERNEL | __GFP_ACCOUNT); - if (!ops) { - err = -ENOMEM; - goto release_vm_lock; - } - } - - for (i = 0; i < args->num_binds; ++i) { - struct drm_gem_object *gem_obj; - u64 range = bind_ops[i].range; - u64 addr = bind_ops[i].addr; - u32 obj = bind_ops[i].obj; - u64 obj_offset = bind_ops[i].obj_offset; - u16 pat_index = bind_ops[i].pat_index; - - if (!obj) - continue; - - gem_obj = drm_gem_object_lookup(file, obj); - if (XE_IOCTL_DBG(xe, !gem_obj)) { - err = -ENOENT; - goto put_obj; - } - bos[i] = gem_to_xe_bo(gem_obj); - - err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, - obj_offset, pat_index); - if (err) - goto put_obj; - } - - if (args->num_syncs) { - syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); - if (!syncs) { - err = -ENOMEM; - goto put_obj; - } - } - - syncs_user = u64_to_user_ptr(args->syncs); - for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { - err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], - &syncs_user[num_syncs], - (xe_vm_in_lr_mode(vm) ? - SYNC_PARSE_FLAG_LR_MODE : 0) | - (!args->num_binds ? - SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); - if (err) - goto free_syncs; - - if (xe_sync_is_ufence(&syncs[num_syncs])) - num_ufence++; - } - - if (XE_IOCTL_DBG(xe, num_ufence > 1)) { - err = -EINVAL; - goto free_syncs; - } - - if (!args->num_binds) { - err = -ENODATA; - goto free_syncs; - } - - xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); - for (i = 0; i < args->num_binds; ++i) { - u64 range = bind_ops[i].range; - u64 addr = bind_ops[i].addr; - u32 op = bind_ops[i].op; - u32 flags = bind_ops[i].flags; - u64 obj_offset = bind_ops[i].obj_offset; - u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; - u16 pat_index = bind_ops[i].pat_index; - - ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, - addr, range, op, flags, - prefetch_region, pat_index); - if (IS_ERR(ops[i])) { - err = PTR_ERR(ops[i]); - ops[i] = NULL; - goto unwind_ops; - } - - err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); - if (err) - goto unwind_ops; - } - - /* Nothing to do */ - if (list_empty(&vops.list)) { - err = -ENODATA; - goto unwind_ops; - } - - err = xe_vma_ops_alloc(&vops); - if (err) - goto unwind_ops; - - err = vm_bind_ioctl_ops_execute(vm, &vops); - -unwind_ops: - if (err && err != -ENODATA) - vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); - xe_vma_ops_fini(&vops); - for (i = args->num_binds - 1; i >= 0; --i) - if (ops[i]) - drm_gpuva_ops_free(&vm->gpuvm, ops[i]); -free_syncs: - if (err == -ENODATA) - err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); - while (num_syncs--) - xe_sync_entry_cleanup(&syncs[num_syncs]); - - kfree(syncs); -put_obj: - for (i = 0; i < args->num_binds; ++i) - xe_bo_put(bos[i]); -release_vm_lock: - up_write(&vm->lock); -put_vm: - xe_vm_put(vm); -put_exec_queue: - if (q) - xe_exec_queue_put(q); -free_objs: - kvfree(bos); - kvfree(ops); - if (args->num_binds > 1) - kvfree(bind_ops); - return err; -} - -/** - * xe_vm_lock() - Lock the vm's dma_resv object - * @vm: The struct xe_vm whose lock is to be locked - * @intr: Whether to perform any wait interruptible - * - * Return: 0 on success, -EINTR if @intr is true and the wait for a - * contended lock was interrupted. If @intr is false, the function - * always returns 0. - */ -int xe_vm_lock(struct xe_vm *vm, bool intr) -{ - if (intr) - return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); - - return dma_resv_lock(xe_vm_resv(vm), NULL); -} - -/** - * xe_vm_unlock() - Unlock the vm's dma_resv object - * @vm: The struct xe_vm whose lock is to be released. - * - * Unlock a buffer object lock that was locked by xe_vm_lock(). - */ -void xe_vm_unlock(struct xe_vm *vm) -{ - dma_resv_unlock(xe_vm_resv(vm)); -} - -/** - * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock - * @vma: VMA to invalidate - * - * Walks a list of page tables leaves which it memset the entries owned by this - * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is - * complete. - * - * Returns 0 for success, negative error code otherwise. - */ -int xe_vm_invalidate_vma(struct xe_vma *vma) -{ - struct xe_device *xe = xe_vma_vm(vma)->xe; - struct xe_tile *tile; - struct xe_gt_tlb_invalidation_fence - fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; - u8 id; - u32 fence_id = 0; - int ret = 0; - - xe_assert(xe, !xe_vma_is_null(vma)); - trace_xe_vma_invalidate(vma); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "INVALIDATE: addr=0x%016llx, range=0x%016llx", - xe_vma_start(vma), xe_vma_size(vma)); - - /* Check that we don't race with page-table updates */ - if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { - if (xe_vma_is_userptr(vma)) { - WARN_ON_ONCE(!mmu_interval_check_retry - (&to_userptr_vma(vma)->userptr.notifier, - to_userptr_vma(vma)->userptr.notifier_seq)); - WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), - DMA_RESV_USAGE_BOOKKEEP)); - - } else { - xe_bo_assert_held(xe_vma_bo(vma)); - } - } - - for_each_tile(tile, xe, id) { - if (xe_pt_zap_ptes(tile, vma)) { - xe_device_wmb(xe); - xe_gt_tlb_invalidation_fence_init(tile->primary_gt, - &fence[fence_id], - true); - - ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, - &fence[fence_id], vma); - if (ret < 0) { - xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); - goto wait; - } - ++fence_id; - - if (!tile->media_gt) - continue; - - xe_gt_tlb_invalidation_fence_init(tile->media_gt, - &fence[fence_id], - true); - - ret = xe_gt_tlb_invalidation_vma(tile->media_gt, - &fence[fence_id], vma); - if (ret < 0) { - xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); - goto wait; - } - ++fence_id; - } - } - -wait: - for (id = 0; id < fence_id; ++id) - xe_gt_tlb_invalidation_fence_wait(&fence[id]); - - vma->tile_invalidated = vma->tile_mask; - - return ret; -} - -struct xe_vm_snapshot { - unsigned long num_snaps; - struct { - u64 ofs, bo_ofs; - unsigned long len; - struct xe_bo *bo; - void *data; - struct mm_struct *mm; - } snap[]; -}; - -struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) -{ - unsigned long num_snaps = 0, i; - struct xe_vm_snapshot *snap = NULL; - struct drm_gpuva *gpuva; - - if (!vm) - return NULL; - - mutex_lock(&vm->snap_mutex); - drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { - if (gpuva->flags & XE_VMA_DUMPABLE) - num_snaps++; - } - - if (num_snaps) - snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); - if (!snap) { - snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); - goto out_unlock; - } - - snap->num_snaps = num_snaps; - i = 0; - drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { - struct xe_vma *vma = gpuva_to_vma(gpuva); - struct xe_bo *bo = vma->gpuva.gem.obj ? - gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; - - if (!(gpuva->flags & XE_VMA_DUMPABLE)) - continue; - - snap->snap[i].ofs = xe_vma_start(vma); - snap->snap[i].len = xe_vma_size(vma); - if (bo) { - snap->snap[i].bo = xe_bo_get(bo); - snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); - } else if (xe_vma_is_userptr(vma)) { - struct mm_struct *mm = - to_userptr_vma(vma)->userptr.notifier.mm; - - if (mmget_not_zero(mm)) - snap->snap[i].mm = mm; - else - snap->snap[i].data = ERR_PTR(-EFAULT); - - snap->snap[i].bo_ofs = xe_vma_userptr(vma); - } else { - snap->snap[i].data = ERR_PTR(-ENOENT); - } - i++; - } - -out_unlock: - mutex_unlock(&vm->snap_mutex); - return snap; -} - -void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) -{ - if (IS_ERR_OR_NULL(snap)) - return; - - for (int i = 0; i < snap->num_snaps; i++) { - struct xe_bo *bo = snap->snap[i].bo; - struct iosys_map src; - int err; - - if (IS_ERR(snap->snap[i].data)) - continue; - - snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); - if (!snap->snap[i].data) { - snap->snap[i].data = ERR_PTR(-ENOMEM); - goto cleanup_bo; - } - - if (bo) { - xe_bo_lock(bo, false); - err = ttm_bo_vmap(&bo->ttm, &src); - if (!err) { - xe_map_memcpy_from(xe_bo_device(bo), - snap->snap[i].data, - &src, snap->snap[i].bo_ofs, - snap->snap[i].len); - ttm_bo_vunmap(&bo->ttm, &src); - } - xe_bo_unlock(bo); - } else { - void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; - - kthread_use_mm(snap->snap[i].mm); - if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) - err = 0; - else - err = -EFAULT; - kthread_unuse_mm(snap->snap[i].mm); - - mmput(snap->snap[i].mm); - snap->snap[i].mm = NULL; - } - - if (err) { - kvfree(snap->snap[i].data); - snap->snap[i].data = ERR_PTR(err); - } - -cleanup_bo: - xe_bo_put(bo); - snap->snap[i].bo = NULL; - } -} - -void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) -{ - unsigned long i, j; - - if (IS_ERR_OR_NULL(snap)) { - drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); - return; - } - - for (i = 0; i < snap->num_snaps; i++) { - drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); - - if (IS_ERR(snap->snap[i].data)) { - drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, - PTR_ERR(snap->snap[i].data)); - continue; - } - - drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); - - for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { - u32 *val = snap->snap[i].data + j; - char dumped[ASCII85_BUFSZ]; - - drm_puts(p, ascii85_encode(*val, dumped)); - } - - drm_puts(p, "\n"); - } -} - -void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) -{ - unsigned long i; - - if (IS_ERR_OR_NULL(snap)) - return; - - for (i = 0; i < snap->num_snaps; i++) { - if (!IS_ERR(snap->snap[i].data)) - kvfree(snap->snap[i].data); - xe_bo_put(snap->snap[i].bo); - if (snap->snap[i].mm) - mmput(snap->snap[i].mm); - } - kvfree(snap); -} diff --git a/rr-cache/9b13d9d717b2c7afda9a1a29c7ee25f3085193b3/preimage b/rr-cache/9b13d9d717b2c7afda9a1a29c7ee25f3085193b3/preimage deleted file mode 100644 index 0024e2a821a8..000000000000 --- a/rr-cache/9b13d9d717b2c7afda9a1a29c7ee25f3085193b3/preimage +++ /dev/null @@ -1,3363 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2021 Intel Corporation - */ - -#include "xe_vm.h" - -#include <linux/dma-fence-array.h> -#include <linux/nospec.h> - -#include <drm/drm_exec.h> -#include <drm/drm_print.h> -#include <drm/ttm/ttm_execbuf_util.h> -#include <drm/ttm/ttm_tt.h> -#include <drm/xe_drm.h> -#include <linux/ascii85.h> -#include <linux/delay.h> -#include <linux/kthread.h> -#include <linux/mm.h> -#include <linux/swap.h> - -#include <generated/xe_wa_oob.h> - -#include "regs/xe_gtt_defs.h" -#include "xe_assert.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_exec_queue.h" -#include "xe_gt_pagefault.h" -#include "xe_gt_tlb_invalidation.h" -#include "xe_migrate.h" -#include "xe_pat.h" -#include "xe_pm.h" -#include "xe_preempt_fence.h" -#include "xe_pt.h" -#include "xe_res_cursor.h" -#include "xe_sync.h" -#include "xe_trace_bo.h" -#include "xe_wa.h" -#include "xe_hmm.h" - -static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) -{ - return vm->gpuvm.r_obj; -} - -/** - * xe_vma_userptr_check_repin() - Advisory check for repin needed - * @uvma: The userptr vma - * - * Check if the userptr vma has been invalidated since last successful - * repin. The check is advisory only and can the function can be called - * without the vm->userptr.notifier_lock held. There is no guarantee that the - * vma userptr will remain valid after a lockless check, so typically - * the call needs to be followed by a proper check under the notifier_lock. - * - * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended. - */ -int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) -{ - return mmu_interval_check_retry(&uvma->userptr.notifier, - uvma->userptr.notifier_seq) ? - -EAGAIN : 0; -} - -int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) -{ - struct xe_vma *vma = &uvma->vma; - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_device *xe = vm->xe; - - lockdep_assert_held(&vm->lock); - xe_assert(xe, xe_vma_is_userptr(vma)); - - return xe_hmm_userptr_populate_range(uvma, false); -} - -static bool preempt_fences_waiting(struct xe_vm *vm) -{ - struct xe_exec_queue *q; - - lockdep_assert_held(&vm->lock); - xe_vm_assert_held(vm); - - list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { - if (!q->lr.pfence || - test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &q->lr.pfence->flags)) { - return true; - } - } - - return false; -} - -static void free_preempt_fences(struct list_head *list) -{ - struct list_head *link, *next; - - list_for_each_safe(link, next, list) - xe_preempt_fence_free(to_preempt_fence_from_link(link)); -} - -static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list, - unsigned int *count) -{ - lockdep_assert_held(&vm->lock); - xe_vm_assert_held(vm); - - if (*count >= vm->preempt.num_exec_queues) - return 0; - - for (; *count < vm->preempt.num_exec_queues; ++(*count)) { - struct xe_preempt_fence *pfence = xe_preempt_fence_alloc(); - - if (IS_ERR(pfence)) - return PTR_ERR(pfence); - - list_move_tail(xe_preempt_fence_link(pfence), list); - } - - return 0; -} - -static int wait_for_existing_preempt_fences(struct xe_vm *vm) -{ - struct xe_exec_queue *q; - - xe_vm_assert_held(vm); - - list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { - if (q->lr.pfence) { - long timeout = dma_fence_wait(q->lr.pfence, false); - - if (timeout < 0) - return -ETIME; - dma_fence_put(q->lr.pfence); - q->lr.pfence = NULL; - } - } - - return 0; -} - -static bool xe_vm_is_idle(struct xe_vm *vm) -{ - struct xe_exec_queue *q; - - xe_vm_assert_held(vm); - list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { - if (!xe_exec_queue_is_idle(q)) - return false; - } - - return true; -} - -static void arm_preempt_fences(struct xe_vm *vm, struct list_head *list) -{ - struct list_head *link; - struct xe_exec_queue *q; - - list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { - struct dma_fence *fence; - - link = list->next; - xe_assert(vm->xe, link != list); - - fence = xe_preempt_fence_arm(to_preempt_fence_from_link(link), - q, q->lr.context, - ++q->lr.seqno); - dma_fence_put(q->lr.pfence); - q->lr.pfence = fence; - } -} - -static int add_preempt_fences(struct xe_vm *vm, struct xe_bo *bo) -{ - struct xe_exec_queue *q; - int err; - - xe_bo_assert_held(bo); - - if (!vm->preempt.num_exec_queues) - return 0; - - err = dma_resv_reserve_fences(bo->ttm.base.resv, vm->preempt.num_exec_queues); - if (err) - return err; - - list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) - if (q->lr.pfence) { - dma_resv_add_fence(bo->ttm.base.resv, - q->lr.pfence, - DMA_RESV_USAGE_BOOKKEEP); - } - - return 0; -} - -static void resume_and_reinstall_preempt_fences(struct xe_vm *vm, - struct drm_exec *exec) -{ - struct xe_exec_queue *q; - - lockdep_assert_held(&vm->lock); - xe_vm_assert_held(vm); - - list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) { - q->ops->resume(q); - - drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, q->lr.pfence, - DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); - } -} - -int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) -{ - struct drm_gpuvm_exec vm_exec = { - .vm = &vm->gpuvm, - .flags = DRM_EXEC_INTERRUPTIBLE_WAIT, - .num_fences = 1, - }; - struct drm_exec *exec = &vm_exec.exec; - struct dma_fence *pfence; - int err; - bool wait; - - xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); - - down_write(&vm->lock); - err = drm_gpuvm_exec_lock(&vm_exec); - if (err) - goto out_up_write; - - pfence = xe_preempt_fence_create(q, q->lr.context, - ++q->lr.seqno); - if (!pfence) { - err = -ENOMEM; - goto out_fini; - } - - list_add(&q->lr.link, &vm->preempt.exec_queues); - ++vm->preempt.num_exec_queues; - q->lr.pfence = pfence; - - down_read(&vm->userptr.notifier_lock); - - drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence, - DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); - - /* - * Check to see if a preemption on VM is in flight or userptr - * invalidation, if so trigger this preempt fence to sync state with - * other preempt fences on the VM. - */ - wait = __xe_vm_userptr_needs_repin(vm) || preempt_fences_waiting(vm); - if (wait) - dma_fence_enable_sw_signaling(pfence); - - up_read(&vm->userptr.notifier_lock); - -out_fini: - drm_exec_fini(exec); -out_up_write: - up_write(&vm->lock); - - return err; -} - -/** - * xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM - * @vm: The VM. - * @q: The exec_queue - */ -void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) -{ - if (!xe_vm_in_preempt_fence_mode(vm)) - return; - - down_write(&vm->lock); - list_del(&q->lr.link); - --vm->preempt.num_exec_queues; - if (q->lr.pfence) { - dma_fence_enable_sw_signaling(q->lr.pfence); - dma_fence_put(q->lr.pfence); - q->lr.pfence = NULL; - } - up_write(&vm->lock); -} - -/** - * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs - * that need repinning. - * @vm: The VM. - * - * This function checks for whether the VM has userptrs that need repinning, - * and provides a release-type barrier on the userptr.notifier_lock after - * checking. - * - * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are. - */ -int __xe_vm_userptr_needs_repin(struct xe_vm *vm) -{ - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - return (list_empty(&vm->userptr.repin_list) && - list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN; -} - -#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000 - -<<<<<<< -/** - * xe_vm_kill() - VM Kill - * @vm: The VM. - * @unlocked: Flag indicates the VM's dma-resv is not held - * - * Kill the VM by setting banned flag indicated VM is no longer available for - * use. If in preempt fence mode, also kill all exec queue attached to the VM. - */ -void xe_vm_kill(struct xe_vm *vm, bool unlocked) -======= -static void xe_vm_kill(struct xe_vm *vm, bool unlocked) ->>>>>>> -{ - struct xe_exec_queue *q; - - lockdep_assert_held(&vm->lock); - - if (unlocked) - xe_vm_lock(vm, false); - - vm->flags |= XE_VM_FLAG_BANNED; - trace_xe_vm_kill(vm); - - list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) - q->ops->kill(q); - - if (unlocked) - xe_vm_unlock(vm); - - /* TODO: Inform user the VM is banned */ -} - -/** - * xe_vm_validate_should_retry() - Whether to retry after a validate error. - * @exec: The drm_exec object used for locking before validation. - * @err: The error returned from ttm_bo_validate(). - * @end: A ktime_t cookie that should be set to 0 before first use and - * that should be reused on subsequent calls. - * - * With multiple active VMs, under memory pressure, it is possible that - * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM. - * Until ttm properly handles locking in such scenarios, best thing the - * driver can do is retry with a timeout. Check if that is necessary, and - * if so unlock the drm_exec's objects while keeping the ticket to prepare - * for a rerun. - * - * Return: true if a retry after drm_exec_init() is recommended; - * false otherwise. - */ -bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end) -{ - ktime_t cur; - - if (err != -ENOMEM) - return false; - - cur = ktime_get(); - *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS); - if (!ktime_before(cur, *end)) - return false; - - msleep(20); - return true; -} - -static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) -{ - struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm); - struct drm_gpuva *gpuva; - int ret; - - lockdep_assert_held(&vm->lock); - drm_gpuvm_bo_for_each_va(gpuva, vm_bo) - list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind, - &vm->rebind_list); - - ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false); - if (ret) - return ret; - - vm_bo->evicted = false; - return 0; -} - -/** - * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas - * @vm: The vm for which we are rebinding. - * @exec: The struct drm_exec with the locked GEM objects. - * @num_fences: The number of fences to reserve for the operation, not - * including rebinds and validations. - * - * Validates all evicted gem objects and rebinds their vmas. Note that - * rebindings may cause evictions and hence the validation-rebind - * sequence is rerun until there are no more objects to validate. - * - * Return: 0 on success, negative error code on error. In particular, - * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if - * the drm_exec transaction needs to be restarted. - */ -int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec, - unsigned int num_fences) -{ - struct drm_gem_object *obj; - unsigned long index; - int ret; - - do { - ret = drm_gpuvm_validate(&vm->gpuvm, exec); - if (ret) - return ret; - - ret = xe_vm_rebind(vm, false); - if (ret) - return ret; - } while (!list_empty(&vm->gpuvm.evict.list)); - - drm_exec_for_each_locked_object(exec, index, obj) { - ret = dma_resv_reserve_fences(obj->resv, num_fences); - if (ret) - return ret; - } - - return 0; -} - -static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm, - bool *done) -{ - int err; - - err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0); - if (err) - return err; - - if (xe_vm_is_idle(vm)) { - vm->preempt.rebind_deactivated = true; - *done = true; - return 0; - } - - if (!preempt_fences_waiting(vm)) { - *done = true; - return 0; - } - - err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0); - if (err) - return err; - - err = wait_for_existing_preempt_fences(vm); - if (err) - return err; - - /* - * Add validation and rebinding to the locking loop since both can - * cause evictions which may require blocing dma_resv locks. - * The fence reservation here is intended for the new preempt fences - * we attach at the end of the rebind work. - */ - return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues); -} - -static void preempt_rebind_work_func(struct work_struct *w) -{ - struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work); - struct drm_exec exec; - unsigned int fence_count = 0; - LIST_HEAD(preempt_fences); - ktime_t end = 0; - int err = 0; - long wait; - int __maybe_unused tries = 0; - - xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm)); - trace_xe_vm_rebind_worker_enter(vm); - - down_write(&vm->lock); - - if (xe_vm_is_closed_or_banned(vm)) { - up_write(&vm->lock); - trace_xe_vm_rebind_worker_exit(vm); - return; - } - -retry: - if (xe_vm_userptr_check_repin(vm)) { - err = xe_vm_userptr_pin(vm); - if (err) - goto out_unlock_outer; - } - - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); - - drm_exec_until_all_locked(&exec) { - bool done = false; - - err = xe_preempt_work_begin(&exec, vm, &done); - drm_exec_retry_on_contention(&exec); - if (err || done) { - drm_exec_fini(&exec); - if (err && xe_vm_validate_should_retry(&exec, err, &end)) - err = -EAGAIN; - - goto out_unlock_outer; - } - } - - err = alloc_preempt_fences(vm, &preempt_fences, &fence_count); - if (err) - goto out_unlock; - - err = xe_vm_rebind(vm, true); - if (err) - goto out_unlock; - - /* Wait on rebinds and munmap style VM unbinds */ - wait = dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_KERNEL, - false, MAX_SCHEDULE_TIMEOUT); - if (wait <= 0) { - err = -ETIME; - goto out_unlock; - } - -#define retry_required(__tries, __vm) \ - (IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \ - (!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \ - __xe_vm_userptr_needs_repin(__vm)) - - down_read(&vm->userptr.notifier_lock); - if (retry_required(tries, vm)) { - up_read(&vm->userptr.notifier_lock); - err = -EAGAIN; - goto out_unlock; - } - -#undef retry_required - - spin_lock(&vm->xe->ttm.lru_lock); - ttm_lru_bulk_move_tail(&vm->lru_bulk_move); - spin_unlock(&vm->xe->ttm.lru_lock); - - /* Point of no return. */ - arm_preempt_fences(vm, &preempt_fences); - resume_and_reinstall_preempt_fences(vm, &exec); - up_read(&vm->userptr.notifier_lock); - -out_unlock: - drm_exec_fini(&exec); -out_unlock_outer: - if (err == -EAGAIN) { - trace_xe_vm_rebind_worker_retry(vm); - goto retry; - } - - if (err) { - drm_warn(&vm->xe->drm, "VM worker error: %d\n", err); - xe_vm_kill(vm, true); - } - up_write(&vm->lock); - - free_preempt_fences(&preempt_fences); - - trace_xe_vm_rebind_worker_exit(vm); -} - -static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, - const struct mmu_notifier_range *range, - unsigned long cur_seq) -{ - struct xe_userptr *userptr = container_of(mni, typeof(*userptr), notifier); - struct xe_userptr_vma *uvma = container_of(userptr, typeof(*uvma), userptr); - struct xe_vma *vma = &uvma->vma; - struct xe_vm *vm = xe_vma_vm(vma); - struct dma_resv_iter cursor; - struct dma_fence *fence; - long err; - - xe_assert(vm->xe, xe_vma_is_userptr(vma)); - trace_xe_vma_userptr_invalidate(vma); - - if (!mmu_notifier_range_blockable(range)) - return false; - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "NOTIFIER: addr=0x%016llx, range=0x%016llx", - xe_vma_start(vma), xe_vma_size(vma)); - - down_write(&vm->userptr.notifier_lock); - mmu_interval_set_seq(mni, cur_seq); - - /* No need to stop gpu access if the userptr is not yet bound. */ - if (!userptr->initial_bind) { - up_write(&vm->userptr.notifier_lock); - return true; - } - - /* - * Tell exec and rebind worker they need to repin and rebind this - * userptr. - */ - if (!xe_vm_in_fault_mode(vm) && - !(vma->gpuva.flags & XE_VMA_DESTROYED) && vma->tile_present) { - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&userptr->invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - } - - up_write(&vm->userptr.notifier_lock); - - /* - * Preempt fences turn into schedule disables, pipeline these. - * Note that even in fault mode, we need to wait for binds and - * unbinds to complete, and those are attached as BOOKMARK fences - * to the vm. - */ - dma_resv_iter_begin(&cursor, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); - dma_resv_for_each_fence_unlocked(&cursor, fence) - dma_fence_enable_sw_signaling(fence); - dma_resv_iter_end(&cursor); - - err = dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - XE_WARN_ON(err <= 0); - - if (xe_vm_in_fault_mode(vm)) { - err = xe_vm_invalidate_vma(vma); - XE_WARN_ON(err); - } - - trace_xe_vma_userptr_invalidate_complete(vma); - - return true; -} - -static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { - .invalidate = vma_userptr_invalidate, -}; - -int xe_vm_userptr_pin(struct xe_vm *vm) -{ - struct xe_userptr_vma *uvma, *next; - int err = 0; - LIST_HEAD(tmp_evict); - - xe_assert(vm->xe, !xe_vm_in_fault_mode(vm)); - lockdep_assert_held_write(&vm->lock); - - /* Collect invalidated userptrs */ - spin_lock(&vm->userptr.invalidated_lock); - list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, - userptr.invalidate_link) { - list_del_init(&uvma->userptr.invalidate_link); - list_move_tail(&uvma->userptr.repin_link, - &vm->userptr.repin_list); - } - spin_unlock(&vm->userptr.invalidated_lock); - - /* Pin and move to temporary list */ - list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, - userptr.repin_link) { - err = xe_vma_userptr_pin_pages(uvma); - if (err == -EFAULT) { - list_del_init(&uvma->userptr.repin_link); - - /* Wait for pending binds */ - xe_vm_lock(vm, false); - dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - - err = xe_vm_invalidate_vma(&uvma->vma); - xe_vm_unlock(vm); - if (err) - return err; - } else { - if (err < 0) - return err; - - list_del_init(&uvma->userptr.repin_link); - list_move_tail(&uvma->vma.combined_links.rebind, - &vm->rebind_list); - } - } - - return 0; -} - -/** - * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs - * that need repinning. - * @vm: The VM. - * - * This function does an advisory check for whether the VM has userptrs that - * need repinning. - * - * Return: 0 if there are no indications of userptrs needing repinning, - * -EAGAIN if there are. - */ -int xe_vm_userptr_check_repin(struct xe_vm *vm) -{ - return (list_empty_careful(&vm->userptr.repin_list) && - list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN; -} - -static int xe_vma_ops_alloc(struct xe_vma_ops *vops) -{ - int i; - - for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) { - if (!vops->pt_update_ops[i].num_ops) - continue; - - vops->pt_update_ops[i].ops = - kmalloc_array(vops->pt_update_ops[i].num_ops, - sizeof(*vops->pt_update_ops[i].ops), - GFP_KERNEL); - if (!vops->pt_update_ops[i].ops) - return -ENOMEM; - } - - return 0; -} - -static void xe_vma_ops_fini(struct xe_vma_ops *vops) -{ - int i; - - for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) - kfree(vops->pt_update_ops[i].ops); -} - -static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask) -{ - int i; - - for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) - if (BIT(i) & tile_mask) - ++vops->pt_update_ops[i].num_ops; -} - -static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, - u8 tile_mask) -{ - INIT_LIST_HEAD(&op->link); - op->tile_mask = tile_mask; - op->base.op = DRM_GPUVA_OP_MAP; - op->base.map.va.addr = vma->gpuva.va.addr; - op->base.map.va.range = vma->gpuva.va.range; - op->base.map.gem.obj = vma->gpuva.gem.obj; - op->base.map.gem.offset = vma->gpuva.gem.offset; - op->map.vma = vma; - op->map.immediate = true; - op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; - op->map.is_null = xe_vma_is_null(vma); -} - -static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, - u8 tile_mask) -{ - struct xe_vma_op *op; - - op = kzalloc(sizeof(*op), GFP_KERNEL); - if (!op) - return -ENOMEM; - - xe_vm_populate_rebind(op, vma, tile_mask); - list_add_tail(&op->link, &vops->list); - xe_vma_ops_incr_pt_update_ops(vops, tile_mask); - - return 0; -} - -static struct dma_fence *ops_execute(struct xe_vm *vm, - struct xe_vma_ops *vops); -static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, - struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs); - -int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker) -{ - struct dma_fence *fence; - struct xe_vma *vma, *next; - struct xe_vma_ops vops; - struct xe_vma_op *op, *next_op; - int err, i; - - lockdep_assert_held(&vm->lock); - if ((xe_vm_in_lr_mode(vm) && !rebind_worker) || - list_empty(&vm->rebind_list)) - return 0; - - xe_vma_ops_init(&vops, vm, NULL, NULL, 0); - for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i) - vops.pt_update_ops[i].wait_vm_bookkeep = true; - - xe_vm_assert_held(vm); - list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) { - xe_assert(vm->xe, vma->tile_present); - - if (rebind_worker) - trace_xe_vma_rebind_worker(vma); - else - trace_xe_vma_rebind_exec(vma); - - err = xe_vm_ops_add_rebind(&vops, vma, - vma->tile_present); - if (err) - goto free_ops; - } - - err = xe_vma_ops_alloc(&vops); - if (err) - goto free_ops; - - fence = ops_execute(vm, &vops); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - } else { - dma_fence_put(fence); - list_for_each_entry_safe(vma, next, &vm->rebind_list, - combined_links.rebind) - list_del_init(&vma->combined_links.rebind); - } -free_ops: - list_for_each_entry_safe(op, next_op, &vops.list, link) { - list_del(&op->link); - kfree(op); - } - xe_vma_ops_fini(&vops); - - return err; -} - -struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_mask) -{ - struct dma_fence *fence = NULL; - struct xe_vma_ops vops; - struct xe_vma_op *op, *next_op; - struct xe_tile *tile; - u8 id; - int err; - - lockdep_assert_held(&vm->lock); - xe_vm_assert_held(vm); - xe_assert(vm->xe, xe_vm_in_fault_mode(vm)); - - xe_vma_ops_init(&vops, vm, NULL, NULL, 0); - for_each_tile(tile, vm->xe, id) { - vops.pt_update_ops[id].wait_vm_bookkeep = true; - vops.pt_update_ops[tile->id].q = - xe_tile_migrate_exec_queue(tile); - } - - err = xe_vm_ops_add_rebind(&vops, vma, tile_mask); - if (err) - return ERR_PTR(err); - - err = xe_vma_ops_alloc(&vops); - if (err) { - fence = ERR_PTR(err); - goto free_ops; - } - - fence = ops_execute(vm, &vops); - -free_ops: - list_for_each_entry_safe(op, next_op, &vops.list, link) { - list_del(&op->link); - kfree(op); - } - xe_vma_ops_fini(&vops); - - return fence; -} - -static void xe_vma_free(struct xe_vma *vma) -{ - if (xe_vma_is_userptr(vma)) - kfree(to_userptr_vma(vma)); - else - kfree(vma); -} - -#define VMA_CREATE_FLAG_READ_ONLY BIT(0) -#define VMA_CREATE_FLAG_IS_NULL BIT(1) -#define VMA_CREATE_FLAG_DUMPABLE BIT(2) - -static struct xe_vma *xe_vma_create(struct xe_vm *vm, - struct xe_bo *bo, - u64 bo_offset_or_userptr, - u64 start, u64 end, - u16 pat_index, unsigned int flags) -{ - struct xe_vma *vma; - struct xe_tile *tile; - u8 id; - bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); - bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); - bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); - - xe_assert(vm->xe, start < end); - xe_assert(vm->xe, end < vm->size); - - /* - * Allocate and ensure that the xe_vma_is_userptr() return - * matches what was allocated. - */ - if (!bo && !is_null) { - struct xe_userptr_vma *uvma = kzalloc(sizeof(*uvma), GFP_KERNEL); - - if (!uvma) - return ERR_PTR(-ENOMEM); - - vma = &uvma->vma; - } else { - vma = kzalloc(sizeof(*vma), GFP_KERNEL); - if (!vma) - return ERR_PTR(-ENOMEM); - - if (is_null) - vma->gpuva.flags |= DRM_GPUVA_SPARSE; - if (bo) - vma->gpuva.gem.obj = &bo->ttm.base; - } - - INIT_LIST_HEAD(&vma->combined_links.rebind); - - INIT_LIST_HEAD(&vma->gpuva.gem.entry); - vma->gpuva.vm = &vm->gpuvm; - vma->gpuva.va.addr = start; - vma->gpuva.va.range = end - start + 1; - if (read_only) - vma->gpuva.flags |= XE_VMA_READ_ONLY; - if (dumpable) - vma->gpuva.flags |= XE_VMA_DUMPABLE; - - for_each_tile(tile, vm->xe, id) - vma->tile_mask |= 0x1 << id; - - if (vm->xe->info.has_atomic_enable_pte_bit) - vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT; - - vma->pat_index = pat_index; - - if (bo) { - struct drm_gpuvm_bo *vm_bo; - - xe_bo_assert_held(bo); - - vm_bo = drm_gpuvm_bo_obtain(vma->gpuva.vm, &bo->ttm.base); - if (IS_ERR(vm_bo)) { - xe_vma_free(vma); - return ERR_CAST(vm_bo); - } - - drm_gpuvm_bo_extobj_add(vm_bo); - drm_gem_object_get(&bo->ttm.base); - vma->gpuva.gem.offset = bo_offset_or_userptr; - drm_gpuva_link(&vma->gpuva, vm_bo); - drm_gpuvm_bo_put(vm_bo); - } else /* userptr or null */ { - if (!is_null) { - struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr; - u64 size = end - start + 1; - int err; - - INIT_LIST_HEAD(&userptr->invalidate_link); - INIT_LIST_HEAD(&userptr->repin_link); - vma->gpuva.gem.offset = bo_offset_or_userptr; - - err = mmu_interval_notifier_insert(&userptr->notifier, - current->mm, - xe_vma_userptr(vma), size, - &vma_userptr_notifier_ops); - if (err) { - xe_vma_free(vma); - return ERR_PTR(err); - } - - userptr->notifier_seq = LONG_MAX; - } - - xe_vm_get(vm); - } - - return vma; -} - -static void xe_vma_destroy_late(struct xe_vma *vma) -{ - struct xe_vm *vm = xe_vma_vm(vma); - - if (vma->ufence) { - xe_sync_ufence_put(vma->ufence); - vma->ufence = NULL; - } - - if (xe_vma_is_userptr(vma)) { - struct xe_userptr_vma *uvma = to_userptr_vma(vma); - struct xe_userptr *userptr = &uvma->userptr; - - if (userptr->sg) - xe_hmm_userptr_free_sg(uvma); - - /* - * Since userptr pages are not pinned, we can't remove - * the notifer until we're sure the GPU is not accessing - * them anymore - */ - mmu_interval_notifier_remove(&userptr->notifier); - xe_vm_put(vm); - } else if (xe_vma_is_null(vma)) { - xe_vm_put(vm); - } else { - xe_bo_put(xe_vma_bo(vma)); - } - - xe_vma_free(vma); -} - -static void vma_destroy_work_func(struct work_struct *w) -{ - struct xe_vma *vma = - container_of(w, struct xe_vma, destroy_work); - - xe_vma_destroy_late(vma); -} - -static void vma_destroy_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct xe_vma *vma = container_of(cb, struct xe_vma, destroy_cb); - - INIT_WORK(&vma->destroy_work, vma_destroy_work_func); - queue_work(system_unbound_wq, &vma->destroy_work); -} - -static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) -{ - struct xe_vm *vm = xe_vma_vm(vma); - - lockdep_assert_held_write(&vm->lock); - xe_assert(vm->xe, list_empty(&vma->combined_links.destroy)); - - if (xe_vma_is_userptr(vma)) { - xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); - - spin_lock(&vm->userptr.invalidated_lock); - list_del(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } else if (!xe_vma_is_null(vma)) { - xe_bo_assert_held(xe_vma_bo(vma)); - - drm_gpuva_unlink(&vma->gpuva); - } - - xe_vm_assert_held(vm); - if (fence) { - int ret = dma_fence_add_callback(fence, &vma->destroy_cb, - vma_destroy_cb); - - if (ret) { - XE_WARN_ON(ret != -ENOENT); - xe_vma_destroy_late(vma); - } - } else { - xe_vma_destroy_late(vma); - } -} - -/** - * xe_vm_lock_vma() - drm_exec utility to lock a vma - * @exec: The drm_exec object we're currently locking for. - * @vma: The vma for witch we want to lock the vm resv and any attached - * object's resv. - * - * Return: 0 on success, negative error code on error. In particular - * may return -EDEADLK on WW transaction contention and -EINTR if - * an interruptible wait is terminated by a signal. - */ -int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma) -{ - struct xe_vm *vm = xe_vma_vm(vma); - struct xe_bo *bo = xe_vma_bo(vma); - int err; - - XE_WARN_ON(!vm); - - err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); - if (!err && bo && !bo->vm) - err = drm_exec_lock_obj(exec, &bo->ttm.base); - - return err; -} - -static void xe_vma_destroy_unlocked(struct xe_vma *vma) -{ - struct drm_exec exec; - int err; - - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - err = xe_vm_lock_vma(&exec, vma); - drm_exec_retry_on_contention(&exec); - if (XE_WARN_ON(err)) - break; - } - - xe_vma_destroy(vma, NULL); - - drm_exec_fini(&exec); -} - -struct xe_vma * -xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range) -{ - struct drm_gpuva *gpuva; - - lockdep_assert_held(&vm->lock); - - if (xe_vm_is_closed_or_banned(vm)) - return NULL; - - xe_assert(vm->xe, start + range <= vm->size); - - gpuva = drm_gpuva_find_first(&vm->gpuvm, start, range); - - return gpuva ? gpuva_to_vma(gpuva) : NULL; -} - -static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) -{ - int err; - - xe_assert(vm->xe, xe_vma_vm(vma) == vm); - lockdep_assert_held(&vm->lock); - - mutex_lock(&vm->snap_mutex); - err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); - mutex_unlock(&vm->snap_mutex); - XE_WARN_ON(err); /* Shouldn't be possible */ - - return err; -} - -static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) -{ - xe_assert(vm->xe, xe_vma_vm(vma) == vm); - lockdep_assert_held(&vm->lock); - - mutex_lock(&vm->snap_mutex); - drm_gpuva_remove(&vma->gpuva); - mutex_unlock(&vm->snap_mutex); - if (vm->usm.last_fault_vma == vma) - vm->usm.last_fault_vma = NULL; -} - -static struct drm_gpuva_op *xe_vm_op_alloc(void) -{ - struct xe_vma_op *op; - - op = kzalloc(sizeof(*op), GFP_KERNEL); - - if (unlikely(!op)) - return NULL; - - return &op->base; -} - -static void xe_vm_free(struct drm_gpuvm *gpuvm); - -static const struct drm_gpuvm_ops gpuvm_ops = { - .op_alloc = xe_vm_op_alloc, - .vm_bo_validate = xe_gpuvm_validate, - .vm_free = xe_vm_free, -}; - -static u64 pde_encode_pat_index(struct xe_device *xe, u16 pat_index) -{ - u64 pte = 0; - - if (pat_index & BIT(0)) - pte |= XE_PPGTT_PTE_PAT0; - - if (pat_index & BIT(1)) - pte |= XE_PPGTT_PTE_PAT1; - - return pte; -} - -static u64 pte_encode_pat_index(struct xe_device *xe, u16 pat_index, - u32 pt_level) -{ - u64 pte = 0; - - if (pat_index & BIT(0)) - pte |= XE_PPGTT_PTE_PAT0; - - if (pat_index & BIT(1)) - pte |= XE_PPGTT_PTE_PAT1; - - if (pat_index & BIT(2)) { - if (pt_level) - pte |= XE_PPGTT_PDE_PDPE_PAT2; - else - pte |= XE_PPGTT_PTE_PAT2; - } - - if (pat_index & BIT(3)) - pte |= XELPG_PPGTT_PTE_PAT3; - - if (pat_index & (BIT(4))) - pte |= XE2_PPGTT_PTE_PAT4; - - return pte; -} - -static u64 pte_encode_ps(u32 pt_level) -{ - XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL); - - if (pt_level == 1) - return XE_PDE_PS_2M; - else if (pt_level == 2) - return XE_PDPE_PS_1G; - - return 0; -} - -static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset, - const u16 pat_index) -{ - struct xe_device *xe = xe_bo_device(bo); - u64 pde; - - pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); - pde |= XE_PAGE_PRESENT | XE_PAGE_RW; - pde |= pde_encode_pat_index(xe, pat_index); - - return pde; -} - -static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, - u16 pat_index, u32 pt_level) -{ - struct xe_device *xe = xe_bo_device(bo); - u64 pte; - - pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE); - pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); - pte |= pte_encode_ps(pt_level); - - if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)) - pte |= XE_PPGTT_PTE_DM; - - return pte; -} - -static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma, - u16 pat_index, u32 pt_level) -{ - struct xe_device *xe = xe_vma_vm(vma)->xe; - - pte |= XE_PAGE_PRESENT; - - if (likely(!xe_vma_read_only(vma))) - pte |= XE_PAGE_RW; - - pte |= pte_encode_pat_index(xe, pat_index, pt_level); - pte |= pte_encode_ps(pt_level); - - if (unlikely(xe_vma_is_null(vma))) - pte |= XE_PTE_NULL; - - return pte; -} - -static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr, - u16 pat_index, - u32 pt_level, bool devmem, u64 flags) -{ - u64 pte; - - /* Avoid passing random bits directly as flags */ - xe_assert(xe, !(flags & ~XE_PTE_PS64)); - - pte = addr; - pte |= XE_PAGE_PRESENT | XE_PAGE_RW; - pte |= pte_encode_pat_index(xe, pat_index, pt_level); - pte |= pte_encode_ps(pt_level); - - if (devmem) - pte |= XE_PPGTT_PTE_DM; - - pte |= flags; - - return pte; -} - -static const struct xe_pt_ops xelp_pt_ops = { - .pte_encode_bo = xelp_pte_encode_bo, - .pte_encode_vma = xelp_pte_encode_vma, - .pte_encode_addr = xelp_pte_encode_addr, - .pde_encode_bo = xelp_pde_encode_bo, -}; - -static void vm_destroy_work_func(struct work_struct *w); - -/** - * xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the - * given tile and vm. - * @xe: xe device. - * @tile: tile to set up for. - * @vm: vm to set up for. - * - * Sets up a pagetable tree with one page-table per level and a single - * leaf PTE. All pagetable entries point to the single page-table or, - * for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and - * writes become NOPs. - * - * Return: 0 on success, negative error code on error. - */ -static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, - struct xe_vm *vm) -{ - u8 id = tile->id; - int i; - - for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) { - vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i); - if (IS_ERR(vm->scratch_pt[id][i])) - return PTR_ERR(vm->scratch_pt[id][i]); - - xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]); - } - - return 0; -} - -static void xe_vm_free_scratch(struct xe_vm *vm) -{ - struct xe_tile *tile; - u8 id; - - if (!xe_vm_has_scratch(vm)) - return; - - for_each_tile(tile, vm->xe, id) { - u32 i; - - if (!vm->pt_root[id]) - continue; - - for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i) - if (vm->scratch_pt[id][i]) - xe_pt_destroy(vm->scratch_pt[id][i], vm->flags, NULL); - } -} - -struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) -{ - struct drm_gem_object *vm_resv_obj; - struct xe_vm *vm; - int err, number_tiles = 0; - struct xe_tile *tile; - u8 id; - - vm = kzalloc(sizeof(*vm), GFP_KERNEL); - if (!vm) - return ERR_PTR(-ENOMEM); - - vm->xe = xe; - - vm->size = 1ull << xe->info.va_bits; - - vm->flags = flags; - - init_rwsem(&vm->lock); - mutex_init(&vm->snap_mutex); - - INIT_LIST_HEAD(&vm->rebind_list); - - INIT_LIST_HEAD(&vm->userptr.repin_list); - INIT_LIST_HEAD(&vm->userptr.invalidated); - init_rwsem(&vm->userptr.notifier_lock); - spin_lock_init(&vm->userptr.invalidated_lock); - - INIT_WORK(&vm->destroy_work, vm_destroy_work_func); - - INIT_LIST_HEAD(&vm->preempt.exec_queues); - vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ - - for_each_tile(tile, xe, id) - xe_range_fence_tree_init(&vm->rftree[id]); - - vm->pt_ops = &xelp_pt_ops; - - /* - * Long-running workloads are not protected by the scheduler references. - * By design, run_job for long-running workloads returns NULL and the - * scheduler drops all the references of it, hence protecting the VM - * for this case is necessary. - */ - if (flags & XE_VM_FLAG_LR_MODE) - xe_pm_runtime_get_noresume(xe); - - vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm); - if (!vm_resv_obj) { - err = -ENOMEM; - goto err_no_resv; - } - - drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm, - vm_resv_obj, 0, vm->size, 0, 0, &gpuvm_ops); - - drm_gem_object_put(vm_resv_obj); - - err = xe_vm_lock(vm, true); - if (err) - goto err_close; - - if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) - vm->flags |= XE_VM_FLAG_64K; - - for_each_tile(tile, xe, id) { - if (flags & XE_VM_FLAG_MIGRATION && - tile->id != XE_VM_FLAG_TILE_ID(flags)) - continue; - - vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level); - if (IS_ERR(vm->pt_root[id])) { - err = PTR_ERR(vm->pt_root[id]); - vm->pt_root[id] = NULL; - goto err_unlock_close; - } - } - - if (xe_vm_has_scratch(vm)) { - for_each_tile(tile, xe, id) { - if (!vm->pt_root[id]) - continue; - - err = xe_vm_create_scratch(xe, tile, vm); - if (err) - goto err_unlock_close; - } - vm->batch_invalidate_tlb = true; - } - - if (vm->flags & XE_VM_FLAG_LR_MODE) { - INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func); - vm->batch_invalidate_tlb = false; - } - - /* Fill pt_root after allocating scratch tables */ - for_each_tile(tile, xe, id) { - if (!vm->pt_root[id]) - continue; - - xe_pt_populate_empty(tile, vm, vm->pt_root[id]); - } - xe_vm_unlock(vm); - - /* Kernel migration VM shouldn't have a circular loop.. */ - if (!(flags & XE_VM_FLAG_MIGRATION)) { - for_each_tile(tile, xe, id) { - struct xe_gt *gt = tile->primary_gt; - struct xe_vm *migrate_vm; - struct xe_exec_queue *q; - u32 create_flags = EXEC_QUEUE_FLAG_VM; - - if (!vm->pt_root[id]) - continue; - - migrate_vm = xe_migrate_get_vm(tile->migrate); - q = xe_exec_queue_create_class(xe, gt, migrate_vm, - XE_ENGINE_CLASS_COPY, - create_flags); - xe_vm_put(migrate_vm); - if (IS_ERR(q)) { - err = PTR_ERR(q); - goto err_close; - } - vm->q[id] = q; - number_tiles++; - } - } - - if (number_tiles > 1) - vm->composite_fence_ctx = dma_fence_context_alloc(1); - - mutex_lock(&xe->usm.lock); - if (flags & XE_VM_FLAG_FAULT_MODE) - xe->usm.num_vm_in_fault_mode++; - else if (!(flags & XE_VM_FLAG_MIGRATION)) - xe->usm.num_vm_in_non_fault_mode++; - mutex_unlock(&xe->usm.lock); - - trace_xe_vm_create(vm); - - return vm; - -err_unlock_close: - xe_vm_unlock(vm); -err_close: - xe_vm_close_and_put(vm); - return ERR_PTR(err); - -err_no_resv: - mutex_destroy(&vm->snap_mutex); - for_each_tile(tile, xe, id) - xe_range_fence_tree_fini(&vm->rftree[id]); - kfree(vm); - if (flags & XE_VM_FLAG_LR_MODE) - xe_pm_runtime_put(xe); - return ERR_PTR(err); -} - -static void xe_vm_close(struct xe_vm *vm) -{ - down_write(&vm->lock); - vm->size = 0; - up_write(&vm->lock); -} - -void xe_vm_close_and_put(struct xe_vm *vm) -{ - LIST_HEAD(contested); - struct xe_device *xe = vm->xe; - struct xe_tile *tile; - struct xe_vma *vma, *next_vma; - struct drm_gpuva *gpuva, *next; - u8 id; - - xe_assert(xe, !vm->preempt.num_exec_queues); - - xe_vm_close(vm); - if (xe_vm_in_preempt_fence_mode(vm)) - flush_work(&vm->preempt.rebind_work); - - down_write(&vm->lock); - for_each_tile(tile, xe, id) { - if (vm->q[id]) - xe_exec_queue_last_fence_put(vm->q[id], vm); - } - up_write(&vm->lock); - - for_each_tile(tile, xe, id) { - if (vm->q[id]) { - xe_exec_queue_kill(vm->q[id]); - xe_exec_queue_put(vm->q[id]); - vm->q[id] = NULL; - } - } - - down_write(&vm->lock); - xe_vm_lock(vm, false); - drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) { - vma = gpuva_to_vma(gpuva); - - if (xe_vma_has_no_bo(vma)) { - down_read(&vm->userptr.notifier_lock); - vma->gpuva.flags |= XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); - } - - xe_vm_remove_vma(vm, vma); - - /* easy case, remove from VMA? */ - if (xe_vma_has_no_bo(vma) || xe_vma_bo(vma)->vm) { - list_del_init(&vma->combined_links.rebind); - xe_vma_destroy(vma, NULL); - continue; - } - - list_move_tail(&vma->combined_links.destroy, &contested); - vma->gpuva.flags |= XE_VMA_DESTROYED; - } - - /* - * All vm operations will add shared fences to resv. - * The only exception is eviction for a shared object, - * but even so, the unbind when evicted would still - * install a fence to resv. Hence it's safe to - * destroy the pagetables immediately. - */ - xe_vm_free_scratch(vm); - - for_each_tile(tile, xe, id) { - if (vm->pt_root[id]) { - xe_pt_destroy(vm->pt_root[id], vm->flags, NULL); - vm->pt_root[id] = NULL; - } - } - xe_vm_unlock(vm); - - /* - * VM is now dead, cannot re-add nodes to vm->vmas if it's NULL - * Since we hold a refcount to the bo, we can remove and free - * the members safely without locking. - */ - list_for_each_entry_safe(vma, next_vma, &contested, - combined_links.destroy) { - list_del_init(&vma->combined_links.destroy); - xe_vma_destroy_unlocked(vma); - } - - up_write(&vm->lock); - - mutex_lock(&xe->usm.lock); - if (vm->flags & XE_VM_FLAG_FAULT_MODE) - xe->usm.num_vm_in_fault_mode--; - else if (!(vm->flags & XE_VM_FLAG_MIGRATION)) - xe->usm.num_vm_in_non_fault_mode--; - - if (vm->usm.asid) { - void *lookup; - - xe_assert(xe, xe->info.has_asid); - xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION)); - - lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid); - xe_assert(xe, lookup == vm); - } - mutex_unlock(&xe->usm.lock); - - for_each_tile(tile, xe, id) - xe_range_fence_tree_fini(&vm->rftree[id]); - - xe_vm_put(vm); -} - -static void vm_destroy_work_func(struct work_struct *w) -{ - struct xe_vm *vm = - container_of(w, struct xe_vm, destroy_work); - struct xe_device *xe = vm->xe; - struct xe_tile *tile; - u8 id; - - /* xe_vm_close_and_put was not called? */ - xe_assert(xe, !vm->size); - - if (xe_vm_in_preempt_fence_mode(vm)) - flush_work(&vm->preempt.rebind_work); - - mutex_destroy(&vm->snap_mutex); - - if (vm->flags & XE_VM_FLAG_LR_MODE) - xe_pm_runtime_put(xe); - - for_each_tile(tile, xe, id) - XE_WARN_ON(vm->pt_root[id]); - - trace_xe_vm_free(vm); - - if (vm->xef) - xe_file_put(vm->xef); - - kfree(vm); -} - -static void xe_vm_free(struct drm_gpuvm *gpuvm) -{ - struct xe_vm *vm = container_of(gpuvm, struct xe_vm, gpuvm); - - /* To destroy the VM we need to be able to sleep */ - queue_work(system_unbound_wq, &vm->destroy_work); -} - -struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id) -{ - struct xe_vm *vm; - - mutex_lock(&xef->vm.lock); - vm = xa_load(&xef->vm.xa, id); - if (vm) - xe_vm_get(vm); - mutex_unlock(&xef->vm.lock); - - return vm; -} - -u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile) -{ - return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0, - tile_to_xe(tile)->pat.idx[XE_CACHE_WB]); -} - -static struct xe_exec_queue * -to_wait_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q) -{ - return q ? q : vm->q[0]; -} - -static struct xe_user_fence * -find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs) -{ - unsigned int i; - - for (i = 0; i < num_syncs; i++) { - struct xe_sync_entry *e = &syncs[i]; - - if (xe_sync_is_ufence(e)) - return xe_sync_ufence_get(e); - } - - return NULL; -} - -#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE | \ - DRM_XE_VM_CREATE_FLAG_LR_MODE | \ - DRM_XE_VM_CREATE_FLAG_FAULT_MODE) - -int xe_vm_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_vm_create *args = data; - struct xe_tile *tile; - struct xe_vm *vm; - u32 id, asid; - int err; - u32 flags = 0; - - if (XE_IOCTL_DBG(xe, args->extensions)) - return -EINVAL; - - if (XE_WA(xe_root_mmio_gt(xe), 14016763929)) - args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; - - if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && - !xe->info.has_usm)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE && - args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) && - args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE && - xe_device_in_non_fault_mode(xe))) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) && - xe_device_in_fault_mode(xe))) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, args->extensions)) - return -EINVAL; - - if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE) - flags |= XE_VM_FLAG_SCRATCH_PAGE; - if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) - flags |= XE_VM_FLAG_LR_MODE; - if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE) - flags |= XE_VM_FLAG_FAULT_MODE; - - vm = xe_vm_create(xe, flags); - if (IS_ERR(vm)) - return PTR_ERR(vm); - - mutex_lock(&xef->vm.lock); - err = xa_alloc(&xef->vm.xa, &id, vm, xa_limit_32b, GFP_KERNEL); - mutex_unlock(&xef->vm.lock); - if (err) - goto err_close_and_put; - - if (xe->info.has_asid) { - mutex_lock(&xe->usm.lock); - err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm, - XA_LIMIT(1, XE_MAX_ASID - 1), - &xe->usm.next_asid, GFP_KERNEL); - mutex_unlock(&xe->usm.lock); - if (err < 0) - goto err_free_id; - - vm->usm.asid = asid; - } - - args->vm_id = id; - vm->xef = xe_file_get(xef); - - /* Record BO memory for VM pagetable created against client */ - for_each_tile(tile, xe, id) - if (vm->pt_root[id]) - xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo); - -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM) - /* Warning: Security issue - never enable by default */ - args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE); -#endif - - return 0; - -err_free_id: - mutex_lock(&xef->vm.lock); - xa_erase(&xef->vm.xa, id); - mutex_unlock(&xef->vm.lock); -err_close_and_put: - xe_vm_close_and_put(vm); - - return err; -} - -int xe_vm_destroy_ioctl(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_vm_destroy *args = data; - struct xe_vm *vm; - int err = 0; - - if (XE_IOCTL_DBG(xe, args->pad) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - mutex_lock(&xef->vm.lock); - vm = xa_load(&xef->vm.xa, args->vm_id); - if (XE_IOCTL_DBG(xe, !vm)) - err = -ENOENT; - else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues)) - err = -EBUSY; - else - xa_erase(&xef->vm.xa, args->vm_id); - mutex_unlock(&xef->vm.lock); - - if (!err) - xe_vm_close_and_put(vm); - - return err; -} - -static const u32 region_to_mem_type[] = { - XE_PL_TT, - XE_PL_VRAM0, - XE_PL_VRAM1, -}; - -static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma, - bool post_commit) -{ - down_read(&vm->userptr.notifier_lock); - vma->gpuva.flags |= XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); - if (post_commit) - xe_vm_remove_vma(vm, vma); -} - -#undef ULL -#define ULL unsigned long long - -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) -static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) -{ - struct xe_vma *vma; - - switch (op->op) { - case DRM_GPUVA_OP_MAP: - vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx", - (ULL)op->map.va.addr, (ULL)op->map.va.range); - break; - case DRM_GPUVA_OP_REMAP: - vma = gpuva_to_vma(op->remap.unmap->va); - vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", - (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), - op->remap.unmap->keep ? 1 : 0); - if (op->remap.prev) - vm_dbg(&xe->drm, - "REMAP:PREV: addr=0x%016llx, range=0x%016llx", - (ULL)op->remap.prev->va.addr, - (ULL)op->remap.prev->va.range); - if (op->remap.next) - vm_dbg(&xe->drm, - "REMAP:NEXT: addr=0x%016llx, range=0x%016llx", - (ULL)op->remap.next->va.addr, - (ULL)op->remap.next->va.range); - break; - case DRM_GPUVA_OP_UNMAP: - vma = gpuva_to_vma(op->unmap.va); - vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d", - (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma), - op->unmap.keep ? 1 : 0); - break; - case DRM_GPUVA_OP_PREFETCH: - vma = gpuva_to_vma(op->prefetch.va); - vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx", - (ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma)); - break; - default: - drm_warn(&xe->drm, "NOT POSSIBLE"); - } -} -#else -static void print_op(struct xe_device *xe, struct drm_gpuva_op *op) -{ -} -#endif - -/* - * Create operations list from IOCTL arguments, setup operations fields so parse - * and commit steps are decoupled from IOCTL arguments. This step can fail. - */ -static struct drm_gpuva_ops * -vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, - u64 bo_offset_or_userptr, u64 addr, u64 range, - u32 operation, u32 flags, - u32 prefetch_region, u16 pat_index) -{ - struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; - struct drm_gpuva_ops *ops; - struct drm_gpuva_op *__op; - struct drm_gpuvm_bo *vm_bo; - int err; - - lockdep_assert_held_write(&vm->lock); - - vm_dbg(&vm->xe->drm, - "op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx", - operation, (ULL)addr, (ULL)range, - (ULL)bo_offset_or_userptr); - - switch (operation) { - case DRM_XE_VM_BIND_OP_MAP: - case DRM_XE_VM_BIND_OP_MAP_USERPTR: - ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range, - obj, bo_offset_or_userptr); - break; - case DRM_XE_VM_BIND_OP_UNMAP: - ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range); - break; - case DRM_XE_VM_BIND_OP_PREFETCH: - ops = drm_gpuvm_prefetch_ops_create(&vm->gpuvm, addr, range); - break; - case DRM_XE_VM_BIND_OP_UNMAP_ALL: - xe_assert(vm->xe, bo); - - err = xe_bo_lock(bo, true); - if (err) - return ERR_PTR(err); - - vm_bo = drm_gpuvm_bo_obtain(&vm->gpuvm, obj); - if (IS_ERR(vm_bo)) { - xe_bo_unlock(bo); - return ERR_CAST(vm_bo); - } - - ops = drm_gpuvm_bo_unmap_ops_create(vm_bo); - drm_gpuvm_bo_put(vm_bo); - xe_bo_unlock(bo); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - ops = ERR_PTR(-EINVAL); - } - if (IS_ERR(ops)) - return ops; - - drm_gpuva_for_each_op(__op, ops) { - struct xe_vma_op *op = gpuva_op_to_vma_op(__op); - - if (__op->op == DRM_GPUVA_OP_MAP) { - op->map.immediate = - flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; - op->map.read_only = - flags & DRM_XE_VM_BIND_FLAG_READONLY; - op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; - op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; - op->map.pat_index = pat_index; - } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { - op->prefetch.region = prefetch_region; - } - - print_op(vm->xe, __op); - } - - return ops; -} - -static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, - u16 pat_index, unsigned int flags) -{ - struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL; - struct drm_exec exec; - struct xe_vma *vma; - int err = 0; - - lockdep_assert_held_write(&vm->lock); - - if (bo) { - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); - drm_exec_until_all_locked(&exec) { - err = 0; - if (!bo->vm) { - err = drm_exec_lock_obj(&exec, xe_vm_obj(vm)); - drm_exec_retry_on_contention(&exec); - } - if (!err) { - err = drm_exec_lock_obj(&exec, &bo->ttm.base); - drm_exec_retry_on_contention(&exec); - } - if (err) { - drm_exec_fini(&exec); - return ERR_PTR(err); - } - } - } - vma = xe_vma_create(vm, bo, op->gem.offset, - op->va.addr, op->va.addr + - op->va.range - 1, pat_index, flags); - if (IS_ERR(vma)) - goto err_unlock; - - if (xe_vma_is_userptr(vma)) - err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); - else if (!xe_vma_has_no_bo(vma) && !bo->vm) - err = add_preempt_fences(vm, bo); - -err_unlock: - if (bo) - drm_exec_fini(&exec); - - if (err) { - prep_vma_destroy(vm, vma, false); - xe_vma_destroy_unlocked(vma); - vma = ERR_PTR(err); - } - - return vma; -} - -static u64 xe_vma_max_pte_size(struct xe_vma *vma) -{ - if (vma->gpuva.flags & XE_VMA_PTE_1G) - return SZ_1G; - else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) - return SZ_2M; - else if (vma->gpuva.flags & XE_VMA_PTE_64K) - return SZ_64K; - else if (vma->gpuva.flags & XE_VMA_PTE_4K) - return SZ_4K; - - return SZ_1G; /* Uninitialized, used max size */ -} - -static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) -{ - switch (size) { - case SZ_1G: - vma->gpuva.flags |= XE_VMA_PTE_1G; - break; - case SZ_2M: - vma->gpuva.flags |= XE_VMA_PTE_2M; - break; - case SZ_64K: - vma->gpuva.flags |= XE_VMA_PTE_64K; - break; - case SZ_4K: - vma->gpuva.flags |= XE_VMA_PTE_4K; - break; - } -} - -static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) -{ - int err = 0; - - lockdep_assert_held_write(&vm->lock); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - err |= xe_vm_insert_vma(vm, op->map.vma); - if (!err) - op->flags |= XE_VMA_OP_COMMITTED; - break; - case DRM_GPUVA_OP_REMAP: - { - u8 tile_present = - gpuva_to_vma(op->base.remap.unmap->va)->tile_present; - - prep_vma_destroy(vm, gpuva_to_vma(op->base.remap.unmap->va), - true); - op->flags |= XE_VMA_OP_COMMITTED; - - if (op->remap.prev) { - err |= xe_vm_insert_vma(vm, op->remap.prev); - if (!err) - op->flags |= XE_VMA_OP_PREV_COMMITTED; - if (!err && op->remap.skip_prev) { - op->remap.prev->tile_present = - tile_present; - op->remap.prev = NULL; - } - } - if (op->remap.next) { - err |= xe_vm_insert_vma(vm, op->remap.next); - if (!err) - op->flags |= XE_VMA_OP_NEXT_COMMITTED; - if (!err && op->remap.skip_next) { - op->remap.next->tile_present = - tile_present; - op->remap.next = NULL; - } - } - - /* Adjust for partial unbind after removin VMA from VM */ - if (!err) { - op->base.remap.unmap->va->va.addr = op->remap.start; - op->base.remap.unmap->va->va.range = op->remap.range; - } - break; - } - case DRM_GPUVA_OP_UNMAP: - prep_vma_destroy(vm, gpuva_to_vma(op->base.unmap.va), true); - op->flags |= XE_VMA_OP_COMMITTED; - break; - case DRM_GPUVA_OP_PREFETCH: - op->flags |= XE_VMA_OP_COMMITTED; - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, - struct xe_vma_ops *vops) -{ - struct xe_device *xe = vm->xe; - struct drm_gpuva_op *__op; - struct xe_tile *tile; - u8 id, tile_mask = 0; - int err = 0; - - lockdep_assert_held_write(&vm->lock); - - for_each_tile(tile, vm->xe, id) - tile_mask |= 0x1 << id; - - drm_gpuva_for_each_op(__op, ops) { - struct xe_vma_op *op = gpuva_op_to_vma_op(__op); - struct xe_vma *vma; - unsigned int flags = 0; - - INIT_LIST_HEAD(&op->link); - list_add_tail(&op->link, &vops->list); - op->tile_mask = tile_mask; - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - { - flags |= op->map.read_only ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->map.is_null ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->map.dumpable ? - VMA_CREATE_FLAG_DUMPABLE : 0; - - vma = new_vma(vm, &op->base.map, op->map.pat_index, - flags); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - op->map.vma = vma; - if (op->map.immediate || !xe_vm_in_fault_mode(vm)) - xe_vma_ops_incr_pt_update_ops(vops, - op->tile_mask); - break; - } - case DRM_GPUVA_OP_REMAP: - { - struct xe_vma *old = - gpuva_to_vma(op->base.remap.unmap->va); - - op->remap.start = xe_vma_start(old); - op->remap.range = xe_vma_size(old); - - if (op->base.remap.prev) { - flags |= op->base.remap.unmap->va->flags & - XE_VMA_READ_ONLY ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->base.remap.unmap->va->flags & - DRM_GPUVA_SPARSE ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->base.remap.unmap->va->flags & - XE_VMA_DUMPABLE ? - VMA_CREATE_FLAG_DUMPABLE : 0; - - vma = new_vma(vm, op->base.remap.prev, - old->pat_index, flags); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - op->remap.prev = vma; - - /* - * Userptr creates a new SG mapping so - * we must also rebind. - */ - op->remap.skip_prev = !xe_vma_is_userptr(old) && - IS_ALIGNED(xe_vma_end(vma), - xe_vma_max_pte_size(old)); - if (op->remap.skip_prev) { - xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); - op->remap.range -= - xe_vma_end(vma) - - xe_vma_start(old); - op->remap.start = xe_vma_end(vma); - vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", - (ULL)op->remap.start, - (ULL)op->remap.range); - } else { - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); - } - } - - if (op->base.remap.next) { - flags |= op->base.remap.unmap->va->flags & - XE_VMA_READ_ONLY ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->base.remap.unmap->va->flags & - DRM_GPUVA_SPARSE ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->base.remap.unmap->va->flags & - XE_VMA_DUMPABLE ? - VMA_CREATE_FLAG_DUMPABLE : 0; - - vma = new_vma(vm, op->base.remap.next, - old->pat_index, flags); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - op->remap.next = vma; - - /* - * Userptr creates a new SG mapping so - * we must also rebind. - */ - op->remap.skip_next = !xe_vma_is_userptr(old) && - IS_ALIGNED(xe_vma_start(vma), - xe_vma_max_pte_size(old)); - if (op->remap.skip_next) { - xe_vma_set_pte_size(vma, xe_vma_max_pte_size(old)); - op->remap.range -= - xe_vma_end(old) - - xe_vma_start(vma); - vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", - (ULL)op->remap.start, - (ULL)op->remap.range); - } else { - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); - } - } - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); - break; - } - case DRM_GPUVA_OP_UNMAP: - case DRM_GPUVA_OP_PREFETCH: - /* FIXME: Need to skip some prefetch ops */ - xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - err = xe_vma_op_commit(vm, op); - if (err) - return err; - } - - return 0; -} - -static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op, - bool post_commit, bool prev_post_commit, - bool next_post_commit) -{ - lockdep_assert_held_write(&vm->lock); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (op->map.vma) { - prep_vma_destroy(vm, op->map.vma, post_commit); - xe_vma_destroy_unlocked(op->map.vma); - } - break; - case DRM_GPUVA_OP_UNMAP: - { - struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va); - - if (vma) { - down_read(&vm->userptr.notifier_lock); - vma->gpuva.flags &= ~XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); - if (post_commit) - xe_vm_insert_vma(vm, vma); - } - break; - } - case DRM_GPUVA_OP_REMAP: - { - struct xe_vma *vma = gpuva_to_vma(op->base.remap.unmap->va); - - if (op->remap.prev) { - prep_vma_destroy(vm, op->remap.prev, prev_post_commit); - xe_vma_destroy_unlocked(op->remap.prev); - } - if (op->remap.next) { - prep_vma_destroy(vm, op->remap.next, next_post_commit); - xe_vma_destroy_unlocked(op->remap.next); - } - if (vma) { - down_read(&vm->userptr.notifier_lock); - vma->gpuva.flags &= ~XE_VMA_DESTROYED; - up_read(&vm->userptr.notifier_lock); - if (post_commit) - xe_vm_insert_vma(vm, vma); - } - break; - } - case DRM_GPUVA_OP_PREFETCH: - /* Nothing to do */ - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } -} - -static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, - struct drm_gpuva_ops **ops, - int num_ops_list) -{ - int i; - - for (i = num_ops_list - 1; i >= 0; --i) { - struct drm_gpuva_ops *__ops = ops[i]; - struct drm_gpuva_op *__op; - - if (!__ops) - continue; - - drm_gpuva_for_each_op_reverse(__op, __ops) { - struct xe_vma_op *op = gpuva_op_to_vma_op(__op); - - xe_vma_op_unwind(vm, op, - op->flags & XE_VMA_OP_COMMITTED, - op->flags & XE_VMA_OP_PREV_COMMITTED, - op->flags & XE_VMA_OP_NEXT_COMMITTED); - } - } -} - -static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, - bool validate) -{ - struct xe_bo *bo = xe_vma_bo(vma); - int err = 0; - - if (bo) { - if (!bo->vm) - err = drm_exec_lock_obj(exec, &bo->ttm.base); - if (!err && validate) - err = xe_bo_validate(bo, xe_vma_vm(vma), true); - } - - return err; -} - -static int check_ufence(struct xe_vma *vma) -{ - if (vma->ufence) { - struct xe_user_fence * const f = vma->ufence; - - if (!xe_sync_ufence_get_status(f)) - return -EBUSY; - - vma->ufence = NULL; - xe_sync_ufence_put(f); - } - - return 0; -} - -static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm, - struct xe_vma_op *op) -{ - int err = 0; - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - err = vma_lock_and_validate(exec, op->map.vma, - !xe_vm_in_fault_mode(vm) || - op->map.immediate); - break; - case DRM_GPUVA_OP_REMAP: - err = check_ufence(gpuva_to_vma(op->base.remap.unmap->va)); - if (err) - break; - - err = vma_lock_and_validate(exec, - gpuva_to_vma(op->base.remap.unmap->va), - false); - if (!err && op->remap.prev) - err = vma_lock_and_validate(exec, op->remap.prev, true); - if (!err && op->remap.next) - err = vma_lock_and_validate(exec, op->remap.next, true); - break; - case DRM_GPUVA_OP_UNMAP: - err = check_ufence(gpuva_to_vma(op->base.unmap.va)); - if (err) - break; - - err = vma_lock_and_validate(exec, - gpuva_to_vma(op->base.unmap.va), - false); - break; - case DRM_GPUVA_OP_PREFETCH: - { - struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); - u32 region = op->prefetch.region; - - xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type)); - - err = vma_lock_and_validate(exec, - gpuva_to_vma(op->base.prefetch.va), - false); - if (!err && !xe_vma_has_no_bo(vma)) - err = xe_bo_migrate(xe_vma_bo(vma), - region_to_mem_type[region]); - break; - } - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec, - struct xe_vm *vm, - struct xe_vma_ops *vops) -{ - struct xe_vma_op *op; - int err; - - err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); - if (err) - return err; - - list_for_each_entry(op, &vops->list, link) { - err = op_lock_and_prep(exec, vm, op); - if (err) - return err; - } - - return 0; -} - -static int vm_ops_setup_tile_args(struct xe_vm *vm, struct xe_vma_ops *vops) -{ - struct xe_exec_queue *q = vops->q; - struct xe_tile *tile; - int number_tiles = 0; - u8 id; - - for_each_tile(tile, vm->xe, id) { - if (vops->pt_update_ops[id].num_ops) - ++number_tiles; - - if (vops->pt_update_ops[id].q) - continue; - - if (q) { - vops->pt_update_ops[id].q = q; - if (vm->pt_root[id] && !list_empty(&q->multi_gt_list)) - q = list_next_entry(q, multi_gt_list); - } else { - vops->pt_update_ops[id].q = vm->q[id]; - } - } - - return number_tiles; -} - -static struct dma_fence *ops_execute(struct xe_vm *vm, - struct xe_vma_ops *vops) -{ - struct xe_tile *tile; - struct dma_fence *fence = NULL; - struct dma_fence **fences = NULL; - struct dma_fence_array *cf = NULL; - int number_tiles = 0, current_fence = 0, err; - u8 id; - - number_tiles = vm_ops_setup_tile_args(vm, vops); - if (number_tiles == 0) - return ERR_PTR(-ENODATA); - - if (number_tiles > 1) { - fences = kmalloc_array(number_tiles, sizeof(*fences), - GFP_KERNEL); - if (!fences) - return ERR_PTR(-ENOMEM); - } - - for_each_tile(tile, vm->xe, id) { - if (!vops->pt_update_ops[id].num_ops) - continue; - - err = xe_pt_update_ops_prepare(tile, vops); - if (err) { - fence = ERR_PTR(err); - goto err_out; - } - } - - for_each_tile(tile, vm->xe, id) { - if (!vops->pt_update_ops[id].num_ops) - continue; - - fence = xe_pt_update_ops_run(tile, vops); - if (IS_ERR(fence)) - goto err_out; - - if (fences) - fences[current_fence++] = fence; - } - - if (fences) { - cf = dma_fence_array_create(number_tiles, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - if (!cf) { - --vm->composite_fence_seqno; - fence = ERR_PTR(-ENOMEM); - goto err_out; - } - fence = &cf->base; - } - - for_each_tile(tile, vm->xe, id) { - if (!vops->pt_update_ops[id].num_ops) - continue; - - xe_pt_update_ops_fini(tile, vops); - } - - return fence; - -err_out: - for_each_tile(tile, vm->xe, id) { - if (!vops->pt_update_ops[id].num_ops) - continue; - - xe_pt_update_ops_abort(tile, vops); - } - while (current_fence) - dma_fence_put(fences[--current_fence]); - kfree(fences); - kfree(cf); - - return fence; -} - -static void vma_add_ufence(struct xe_vma *vma, struct xe_user_fence *ufence) -{ - if (vma->ufence) - xe_sync_ufence_put(vma->ufence); - vma->ufence = __xe_sync_ufence_get(ufence); -} - -static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op, - struct xe_user_fence *ufence) -{ - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - vma_add_ufence(op->map.vma, ufence); - break; - case DRM_GPUVA_OP_REMAP: - if (op->remap.prev) - vma_add_ufence(op->remap.prev, ufence); - if (op->remap.next) - vma_add_ufence(op->remap.next, ufence); - break; - case DRM_GPUVA_OP_UNMAP: - break; - case DRM_GPUVA_OP_PREFETCH: - vma_add_ufence(gpuva_to_vma(op->base.prefetch.va), ufence); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } -} - -static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops, - struct dma_fence *fence) -{ - struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q); - struct xe_user_fence *ufence; - struct xe_vma_op *op; - int i; - - ufence = find_ufence_get(vops->syncs, vops->num_syncs); - list_for_each_entry(op, &vops->list, link) { - if (ufence) - op_add_ufence(vm, op, ufence); - - if (op->base.op == DRM_GPUVA_OP_UNMAP) - xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), fence); - else if (op->base.op == DRM_GPUVA_OP_REMAP) - xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), - fence); - } - if (ufence) - xe_sync_ufence_put(ufence); - for (i = 0; i < vops->num_syncs; i++) - xe_sync_entry_signal(vops->syncs + i, fence); - xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence); - dma_fence_put(fence); -} - -static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, - struct xe_vma_ops *vops) -{ - struct drm_exec exec; - struct dma_fence *fence; - int err; - - lockdep_assert_held_write(&vm->lock); - - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | - DRM_EXEC_IGNORE_DUPLICATES, 0); - drm_exec_until_all_locked(&exec) { - err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops); - drm_exec_retry_on_contention(&exec); - if (err) - goto unlock; - - fence = ops_execute(vm, vops); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - goto unlock; - } - - vm_bind_ioctl_ops_fini(vm, vops, fence); - } - -unlock: - drm_exec_fini(&exec); - return err; -} - -#define SUPPORTED_FLAGS \ - (DRM_XE_VM_BIND_FLAG_READONLY | \ - DRM_XE_VM_BIND_FLAG_IMMEDIATE | \ - DRM_XE_VM_BIND_FLAG_NULL | \ - DRM_XE_VM_BIND_FLAG_DUMPABLE) -#define XE_64K_PAGE_MASK 0xffffull -#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) - -static int vm_bind_ioctl_check_args(struct xe_device *xe, - struct drm_xe_vm_bind *args, - struct drm_xe_vm_bind_op **bind_ops) -{ - int err; - int i; - - if (XE_IOCTL_DBG(xe, args->pad || args->pad2) || - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1])) - return -EINVAL; - - if (XE_IOCTL_DBG(xe, args->extensions)) - return -EINVAL; - - if (args->num_binds > 1) { - u64 __user *bind_user = - u64_to_user_ptr(args->vector_of_binds); - - *bind_ops = kvmalloc_array(args->num_binds, - sizeof(struct drm_xe_vm_bind_op), - GFP_KERNEL | __GFP_ACCOUNT); - if (!*bind_ops) - return -ENOMEM; - - err = __copy_from_user(*bind_ops, bind_user, - sizeof(struct drm_xe_vm_bind_op) * - args->num_binds); - if (XE_IOCTL_DBG(xe, err)) { - err = -EFAULT; - goto free_bind_ops; - } - } else { - *bind_ops = &args->bind; - } - - for (i = 0; i < args->num_binds; ++i) { - u64 range = (*bind_ops)[i].range; - u64 addr = (*bind_ops)[i].addr; - u32 op = (*bind_ops)[i].op; - u32 flags = (*bind_ops)[i].flags; - u32 obj = (*bind_ops)[i].obj; - u64 obj_offset = (*bind_ops)[i].obj_offset; - u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance; - bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; - u16 pat_index = (*bind_ops)[i].pat_index; - u16 coh_mode; - - if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) { - err = -EINVAL; - goto free_bind_ops; - } - - pat_index = array_index_nospec(pat_index, xe->pat.n_entries); - (*bind_ops)[i].pat_index = pat_index; - coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); - if (XE_IOCTL_DBG(xe, !coh_mode)) { /* hw reserved */ - err = -EINVAL; - goto free_bind_ops; - } - - if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) { - err = -EINVAL; - goto free_bind_ops; - } - - if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) || - XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) || - XE_IOCTL_DBG(xe, obj && is_null) || - XE_IOCTL_DBG(xe, obj_offset && is_null) || - XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP && - is_null) || - XE_IOCTL_DBG(xe, !obj && - op == DRM_XE_VM_BIND_OP_MAP && - !is_null) || - XE_IOCTL_DBG(xe, !obj && - op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || - XE_IOCTL_DBG(xe, addr && - op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || - XE_IOCTL_DBG(xe, range && - op == DRM_XE_VM_BIND_OP_UNMAP_ALL) || - XE_IOCTL_DBG(xe, obj && - op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || - XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && - op == DRM_XE_VM_BIND_OP_MAP_USERPTR) || - XE_IOCTL_DBG(xe, obj && - op == DRM_XE_VM_BIND_OP_PREFETCH) || - XE_IOCTL_DBG(xe, prefetch_region && - op != DRM_XE_VM_BIND_OP_PREFETCH) || - XE_IOCTL_DBG(xe, !(BIT(prefetch_region) & - xe->info.mem_region_mask)) || - XE_IOCTL_DBG(xe, obj && - op == DRM_XE_VM_BIND_OP_UNMAP)) { - err = -EINVAL; - goto free_bind_ops; - } - - if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) || - XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) || - XE_IOCTL_DBG(xe, range & ~PAGE_MASK) || - XE_IOCTL_DBG(xe, !range && - op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) { - err = -EINVAL; - goto free_bind_ops; - } - } - - return 0; - -free_bind_ops: - if (args->num_binds > 1) - kvfree(*bind_ops); - return err; -} - -static int vm_bind_ioctl_signal_fences(struct xe_vm *vm, - struct xe_exec_queue *q, - struct xe_sync_entry *syncs, - int num_syncs) -{ - struct dma_fence *fence; - int i, err = 0; - - fence = xe_sync_in_fence_get(syncs, num_syncs, - to_wait_exec_queue(vm, q), vm); - if (IS_ERR(fence)) - return PTR_ERR(fence); - - for (i = 0; i < num_syncs; i++) - xe_sync_entry_signal(&syncs[i], fence); - - xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm, - fence); - dma_fence_put(fence); - - return err; -} - -static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm, - struct xe_exec_queue *q, - struct xe_sync_entry *syncs, u32 num_syncs) -{ - memset(vops, 0, sizeof(*vops)); - INIT_LIST_HEAD(&vops->list); - vops->vm = vm; - vops->q = q; - vops->syncs = syncs; - vops->num_syncs = num_syncs; -} - -static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, - u64 addr, u64 range, u64 obj_offset, - u16 pat_index) -{ - u16 coh_mode; - - if (XE_IOCTL_DBG(xe, range > bo->size) || - XE_IOCTL_DBG(xe, obj_offset > - bo->size - range)) { - return -EINVAL; - } - - if (bo->flags & XE_BO_FLAG_INTERNAL_64K) { - if (XE_IOCTL_DBG(xe, obj_offset & - XE_64K_PAGE_MASK) || - XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) || - XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) { - return -EINVAL; - } - } - - coh_mode = xe_pat_index_get_coh_mode(xe, pat_index); - if (bo->cpu_caching) { - if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE && - bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) { - return -EINVAL; - } - } else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) { - /* - * Imported dma-buf from a different device should - * require 1way or 2way coherency since we don't know - * how it was mapped on the CPU. Just assume is it - * potentially cached on CPU side. - */ - return -EINVAL; - } - - return 0; -} - -int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) -{ - struct xe_device *xe = to_xe_device(dev); - struct xe_file *xef = to_xe_file(file); - struct drm_xe_vm_bind *args = data; - struct drm_xe_sync __user *syncs_user; - struct xe_bo **bos = NULL; - struct drm_gpuva_ops **ops = NULL; - struct xe_vm *vm; - struct xe_exec_queue *q = NULL; - u32 num_syncs, num_ufence = 0; - struct xe_sync_entry *syncs = NULL; - struct drm_xe_vm_bind_op *bind_ops; - struct xe_vma_ops vops; - int err; - int i; - - err = vm_bind_ioctl_check_args(xe, args, &bind_ops); - if (err) - return err; - - if (args->exec_queue_id) { - q = xe_exec_queue_lookup(xef, args->exec_queue_id); - if (XE_IOCTL_DBG(xe, !q)) { - err = -ENOENT; - goto free_objs; - } - - if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) { - err = -EINVAL; - goto put_exec_queue; - } - } - - vm = xe_vm_lookup(xef, args->vm_id); - if (XE_IOCTL_DBG(xe, !vm)) { - err = -EINVAL; - goto put_exec_queue; - } - - err = down_write_killable(&vm->lock); - if (err) - goto put_vm; - - if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) { - err = -ENOENT; - goto release_vm_lock; - } - - for (i = 0; i < args->num_binds; ++i) { - u64 range = bind_ops[i].range; - u64 addr = bind_ops[i].addr; - - if (XE_IOCTL_DBG(xe, range > vm->size) || - XE_IOCTL_DBG(xe, addr > vm->size - range)) { - err = -EINVAL; - goto release_vm_lock; - } - } - - if (args->num_binds) { - bos = kvcalloc(args->num_binds, sizeof(*bos), - GFP_KERNEL | __GFP_ACCOUNT); - if (!bos) { - err = -ENOMEM; - goto release_vm_lock; - } - - ops = kvcalloc(args->num_binds, sizeof(*ops), - GFP_KERNEL | __GFP_ACCOUNT); - if (!ops) { - err = -ENOMEM; - goto release_vm_lock; - } - } - - for (i = 0; i < args->num_binds; ++i) { - struct drm_gem_object *gem_obj; - u64 range = bind_ops[i].range; - u64 addr = bind_ops[i].addr; - u32 obj = bind_ops[i].obj; - u64 obj_offset = bind_ops[i].obj_offset; - u16 pat_index = bind_ops[i].pat_index; - - if (!obj) - continue; - - gem_obj = drm_gem_object_lookup(file, obj); - if (XE_IOCTL_DBG(xe, !gem_obj)) { - err = -ENOENT; - goto put_obj; - } - bos[i] = gem_to_xe_bo(gem_obj); - - err = xe_vm_bind_ioctl_validate_bo(xe, bos[i], addr, range, - obj_offset, pat_index); - if (err) - goto put_obj; - } - - if (args->num_syncs) { - syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL); - if (!syncs) { - err = -ENOMEM; - goto put_obj; - } - } - - syncs_user = u64_to_user_ptr(args->syncs); - for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { - err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], - &syncs_user[num_syncs], - (xe_vm_in_lr_mode(vm) ? - SYNC_PARSE_FLAG_LR_MODE : 0) | - (!args->num_binds ? - SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : 0)); - if (err) - goto free_syncs; - - if (xe_sync_is_ufence(&syncs[num_syncs])) - num_ufence++; - } - - if (XE_IOCTL_DBG(xe, num_ufence > 1)) { - err = -EINVAL; - goto free_syncs; - } - - if (!args->num_binds) { - err = -ENODATA; - goto free_syncs; - } - - xe_vma_ops_init(&vops, vm, q, syncs, num_syncs); - for (i = 0; i < args->num_binds; ++i) { - u64 range = bind_ops[i].range; - u64 addr = bind_ops[i].addr; - u32 op = bind_ops[i].op; - u32 flags = bind_ops[i].flags; - u64 obj_offset = bind_ops[i].obj_offset; - u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance; - u16 pat_index = bind_ops[i].pat_index; - - ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset, - addr, range, op, flags, - prefetch_region, pat_index); - if (IS_ERR(ops[i])) { - err = PTR_ERR(ops[i]); - ops[i] = NULL; - goto unwind_ops; - } - - err = vm_bind_ioctl_ops_parse(vm, ops[i], &vops); - if (err) - goto unwind_ops; - } - - /* Nothing to do */ - if (list_empty(&vops.list)) { - err = -ENODATA; - goto unwind_ops; - } - - err = xe_vma_ops_alloc(&vops); - if (err) - goto unwind_ops; - - err = vm_bind_ioctl_ops_execute(vm, &vops); - -unwind_ops: - if (err && err != -ENODATA) - vm_bind_ioctl_ops_unwind(vm, ops, args->num_binds); - xe_vma_ops_fini(&vops); - for (i = args->num_binds - 1; i >= 0; --i) - if (ops[i]) - drm_gpuva_ops_free(&vm->gpuvm, ops[i]); -free_syncs: - if (err == -ENODATA) - err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs); - while (num_syncs--) - xe_sync_entry_cleanup(&syncs[num_syncs]); - - kfree(syncs); -put_obj: - for (i = 0; i < args->num_binds; ++i) - xe_bo_put(bos[i]); -release_vm_lock: - up_write(&vm->lock); -put_vm: - xe_vm_put(vm); -put_exec_queue: - if (q) - xe_exec_queue_put(q); -free_objs: - kvfree(bos); - kvfree(ops); - if (args->num_binds > 1) - kvfree(bind_ops); - return err; -} - -/** - * xe_vm_lock() - Lock the vm's dma_resv object - * @vm: The struct xe_vm whose lock is to be locked - * @intr: Whether to perform any wait interruptible - * - * Return: 0 on success, -EINTR if @intr is true and the wait for a - * contended lock was interrupted. If @intr is false, the function - * always returns 0. - */ -int xe_vm_lock(struct xe_vm *vm, bool intr) -{ - if (intr) - return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL); - - return dma_resv_lock(xe_vm_resv(vm), NULL); -} - -/** - * xe_vm_unlock() - Unlock the vm's dma_resv object - * @vm: The struct xe_vm whose lock is to be released. - * - * Unlock a buffer object lock that was locked by xe_vm_lock(). - */ -void xe_vm_unlock(struct xe_vm *vm) -{ - dma_resv_unlock(xe_vm_resv(vm)); -} - -/** - * xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock - * @vma: VMA to invalidate - * - * Walks a list of page tables leaves which it memset the entries owned by this - * VMA to zero, invalidates the TLBs, and block until TLBs invalidation is - * complete. - * - * Returns 0 for success, negative error code otherwise. - */ -int xe_vm_invalidate_vma(struct xe_vma *vma) -{ - struct xe_device *xe = xe_vma_vm(vma)->xe; - struct xe_tile *tile; - struct xe_gt_tlb_invalidation_fence - fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE]; - u8 id; - u32 fence_id = 0; - int ret = 0; - - xe_assert(xe, !xe_vma_is_null(vma)); - trace_xe_vma_invalidate(vma); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "INVALIDATE: addr=0x%016llx, range=0x%016llx", - xe_vma_start(vma), xe_vma_size(vma)); - - /* Check that we don't race with page-table updates */ - if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { - if (xe_vma_is_userptr(vma)) { - WARN_ON_ONCE(!mmu_interval_check_retry - (&to_userptr_vma(vma)->userptr.notifier, - to_userptr_vma(vma)->userptr.notifier_seq)); - WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)), - DMA_RESV_USAGE_BOOKKEEP)); - - } else { - xe_bo_assert_held(xe_vma_bo(vma)); - } - } - - for_each_tile(tile, xe, id) { - if (xe_pt_zap_ptes(tile, vma)) { - xe_device_wmb(xe); - xe_gt_tlb_invalidation_fence_init(tile->primary_gt, - &fence[fence_id], - true); - - ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, - &fence[fence_id], vma); - if (ret < 0) { - xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); - goto wait; - } - ++fence_id; - - if (!tile->media_gt) - continue; - - xe_gt_tlb_invalidation_fence_init(tile->media_gt, - &fence[fence_id], - true); - - ret = xe_gt_tlb_invalidation_vma(tile->media_gt, - &fence[fence_id], vma); - if (ret < 0) { - xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); - goto wait; - } - ++fence_id; - } - } - -wait: - for (id = 0; id < fence_id; ++id) - xe_gt_tlb_invalidation_fence_wait(&fence[id]); - - vma->tile_invalidated = vma->tile_mask; - - return ret; -} - -struct xe_vm_snapshot { - unsigned long num_snaps; - struct { - u64 ofs, bo_ofs; - unsigned long len; - struct xe_bo *bo; - void *data; - struct mm_struct *mm; - } snap[]; -}; - -struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) -{ - unsigned long num_snaps = 0, i; - struct xe_vm_snapshot *snap = NULL; - struct drm_gpuva *gpuva; - - if (!vm) - return NULL; - - mutex_lock(&vm->snap_mutex); - drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { - if (gpuva->flags & XE_VMA_DUMPABLE) - num_snaps++; - } - - if (num_snaps) - snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); - if (!snap) { - snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV); - goto out_unlock; - } - - snap->num_snaps = num_snaps; - i = 0; - drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { - struct xe_vma *vma = gpuva_to_vma(gpuva); - struct xe_bo *bo = vma->gpuva.gem.obj ? - gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; - - if (!(gpuva->flags & XE_VMA_DUMPABLE)) - continue; - - snap->snap[i].ofs = xe_vma_start(vma); - snap->snap[i].len = xe_vma_size(vma); - if (bo) { - snap->snap[i].bo = xe_bo_get(bo); - snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); - } else if (xe_vma_is_userptr(vma)) { - struct mm_struct *mm = - to_userptr_vma(vma)->userptr.notifier.mm; - - if (mmget_not_zero(mm)) - snap->snap[i].mm = mm; - else - snap->snap[i].data = ERR_PTR(-EFAULT); - - snap->snap[i].bo_ofs = xe_vma_userptr(vma); - } else { - snap->snap[i].data = ERR_PTR(-ENOENT); - } - i++; - } - -out_unlock: - mutex_unlock(&vm->snap_mutex); - return snap; -} - -void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) -{ - if (IS_ERR_OR_NULL(snap)) - return; - - for (int i = 0; i < snap->num_snaps; i++) { - struct xe_bo *bo = snap->snap[i].bo; - struct iosys_map src; - int err; - - if (IS_ERR(snap->snap[i].data)) - continue; - - snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); - if (!snap->snap[i].data) { - snap->snap[i].data = ERR_PTR(-ENOMEM); - goto cleanup_bo; - } - - if (bo) { - xe_bo_lock(bo, false); - err = ttm_bo_vmap(&bo->ttm, &src); - if (!err) { - xe_map_memcpy_from(xe_bo_device(bo), - snap->snap[i].data, - &src, snap->snap[i].bo_ofs, - snap->snap[i].len); - ttm_bo_vunmap(&bo->ttm, &src); - } - xe_bo_unlock(bo); - } else { - void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; - - kthread_use_mm(snap->snap[i].mm); - if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) - err = 0; - else - err = -EFAULT; - kthread_unuse_mm(snap->snap[i].mm); - - mmput(snap->snap[i].mm); - snap->snap[i].mm = NULL; - } - - if (err) { - kvfree(snap->snap[i].data); - snap->snap[i].data = ERR_PTR(err); - } - -cleanup_bo: - xe_bo_put(bo); - snap->snap[i].bo = NULL; - } -} - -void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) -{ - unsigned long i, j; - - if (IS_ERR_OR_NULL(snap)) { - drm_printf(p, "[0].error: %li\n", PTR_ERR(snap)); - return; - } - - for (i = 0; i < snap->num_snaps; i++) { - drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); - - if (IS_ERR(snap->snap[i].data)) { - drm_printf(p, "[%llx].error: %li\n", snap->snap[i].ofs, - PTR_ERR(snap->snap[i].data)); - continue; - } - - drm_printf(p, "[%llx].data: ", snap->snap[i].ofs); - - for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { - u32 *val = snap->snap[i].data + j; - char dumped[ASCII85_BUFSZ]; - - drm_puts(p, ascii85_encode(*val, dumped)); - } - - drm_puts(p, "\n"); - } -} - -void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) -{ - unsigned long i; - - if (IS_ERR_OR_NULL(snap)) - return; - - for (i = 0; i < snap->num_snaps; i++) { - if (!IS_ERR(snap->snap[i].data)) - kvfree(snap->snap[i].data); - xe_bo_put(snap->snap[i].bo); - if (snap->snap[i].mm) - mmput(snap->snap[i].mm); - } - kvfree(snap); -} diff --git a/rr-cache/b29c5b34d90da6afa9207dac9fd5f3a99ae72181/preimage b/rr-cache/b29c5b34d90da6afa9207dac9fd5f3a99ae72181/preimage deleted file mode 100644 index ad7125a258b1..000000000000 --- a/rr-cache/b29c5b34d90da6afa9207dac9fd5f3a99ae72181/preimage +++ /dev/null @@ -1,43 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -/* - * Copyright © 2023 Intel Corporation - */ - -#ifndef _XE_GT_TLB_INVALIDATION_H_ -#define _XE_GT_TLB_INVALIDATION_H_ - -#include <linux/types.h> - -#include "xe_gt_tlb_invalidation_types.h" - -struct xe_gt; -struct xe_guc; -struct xe_vma; - -int xe_gt_tlb_invalidation_init(struct xe_gt *gt); -void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); -int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); -int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - struct xe_vma *vma); -int xe_gt_tlb_invalidation_range(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - u64 start, u64 end, u32 asid); -int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); - -void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, -<<<<<<< - struct xe_gt_tlb_invalidation_fence *fence); -======= - struct xe_gt_tlb_invalidation_fence *fence, - bool stack); -void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence); - -static inline void -xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) -{ - dma_fence_wait(&fence->base, false); -} ->>>>>>> - -#endif /* _XE_GT_TLB_INVALIDATION_ */ diff --git a/rr-cache/bc3bc775dae1d19bda4dd320b9463ba777208695/preimage b/rr-cache/bc3bc775dae1d19bda4dd320b9463ba777208695/preimage deleted file mode 100644 index bbb3bac849b2..000000000000 --- a/rr-cache/bc3bc775dae1d19bda4dd320b9463ba777208695/preimage +++ /dev/null @@ -1,2102 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include <linux/dma-fence-array.h> - -#include "xe_pt.h" - -#include "regs/xe_gtt_defs.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_exec_queue.h" -#include "xe_gt.h" -#include "xe_gt_tlb_invalidation.h" -#include "xe_migrate.h" -#include "xe_pt_types.h" -#include "xe_pt_walk.h" -#include "xe_res_cursor.h" -#include "xe_sched_job.h" -#include "xe_sync.h" -#include "xe_trace.h" -#include "xe_ttm_stolen_mgr.h" -#include "xe_vm.h" - -struct xe_pt_dir { - struct xe_pt pt; - /** @children: Array of page-table child nodes */ - struct xe_ptw *children[XE_PDES]; -}; - -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) -#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) -#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) -#else -#define xe_pt_set_addr(__xe_pt, __addr) -#define xe_pt_addr(__xe_pt) 0ull -#endif - -static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; -static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; - -#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) - -static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) -{ - return container_of(pt, struct xe_pt_dir, pt); -} - -static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) -{ - return container_of(pt_dir->children[index], struct xe_pt, base); -} - -static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, - unsigned int level) -{ - struct xe_device *xe = tile_to_xe(tile); - u16 pat_index = xe->pat.idx[XE_CACHE_WB]; - u8 id = tile->id; - - if (!xe_vm_has_scratch(vm)) - return 0; - - if (level > MAX_HUGEPTE_LEVEL) - return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, - 0, pat_index); - - return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | - XE_PTE_NULL; -} - -static void xe_pt_free(struct xe_pt *pt) -{ - if (pt->level) - kfree(as_xe_pt_dir(pt)); - else - kfree(pt); -} - -/** - * xe_pt_create() - Create a page-table. - * @vm: The vm to create for. - * @tile: The tile to create for. - * @level: The page-table level. - * - * Allocate and initialize a single struct xe_pt metadata structure. Also - * create the corresponding page-table bo, but don't initialize it. If the - * level is grater than zero, then it's assumed to be a directory page- - * table and the directory structure is also allocated and initialized to - * NULL pointers. - * - * Return: A valid struct xe_pt pointer on success, Pointer error code on - * error. - */ -struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level) -{ - struct xe_pt *pt; - struct xe_bo *bo; - int err; - - if (level) { - struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL); - - pt = (dir) ? &dir->pt : NULL; - } else { - pt = kzalloc(sizeof(*pt), GFP_KERNEL); - } - if (!pt) - return ERR_PTR(-ENOMEM); - - pt->level = level; - bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_NO_RESV_EVICT | - XE_BO_FLAG_PAGETABLE); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - goto err_kfree; - } - pt->bo = bo; - pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; - - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, pt->bo); - xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL); - - return pt; - -err_kfree: - xe_pt_free(pt); - return ERR_PTR(err); -} - -/** - * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero - * entries. - * @tile: The tile the scratch pagetable of which to use. - * @vm: The vm we populate for. - * @pt: The pagetable the bo of which to initialize. - * - * Populate the page-table bo of @pt with entries pointing into the tile's - * scratch page-table tree if any. Otherwise populate with zeros. - */ -void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, - struct xe_pt *pt) -{ - struct iosys_map *map = &pt->bo->vmap; - u64 empty; - int i; - - if (!xe_vm_has_scratch(vm)) { - /* - * FIXME: Some memory is allocated already allocated to zero? - * Find out which memory that is and avoid this memset... - */ - xe_map_memset(vm->xe, map, 0, 0, SZ_4K); - } else { - empty = __xe_pt_empty_pte(tile, vm, pt->level); - for (i = 0; i < XE_PDES; i++) - xe_pt_write(vm->xe, map, i, empty); - } -} - -/** - * xe_pt_shift() - Return the ilog2 value of the size of the address range of - * a page-table at a certain level. - * @level: The level. - * - * Return: The ilog2 value of the size of the address range of a page-table - * at level @level. - */ -unsigned int xe_pt_shift(unsigned int level) -{ - return XE_PTE_SHIFT + XE_PDE_SHIFT * level; -} - -/** - * xe_pt_destroy() - Destroy a page-table tree. - * @pt: The root of the page-table tree to destroy. - * @flags: vm flags. Currently unused. - * @deferred: List head of lockless list for deferred putting. NULL for - * immediate putting. - * - * Puts the page-table bo, recursively calls xe_pt_destroy on all children - * and finally frees @pt. TODO: Can we remove the @flags argument? - */ -void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) -{ - int i; - - if (!pt) - return; - - XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list)); - xe_bo_unpin(pt->bo); - xe_bo_put_deferred(pt->bo, deferred); - - if (pt->level > 0 && pt->num_live) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - - for (i = 0; i < XE_PDES; i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, - deferred); - } - } - xe_pt_free(pt); -} - -/** - * DOC: Pagetable building - * - * Below we use the term "page-table" for both page-directories, containing - * pointers to lower level page-directories or page-tables, and level 0 - * page-tables that contain only page-table-entries pointing to memory pages. - * - * When inserting an address range in an already existing page-table tree - * there will typically be a set of page-tables that are shared with other - * address ranges, and a set that are private to this address range. - * The set of shared page-tables can be at most two per level, - * and those can't be updated immediately because the entries of those - * page-tables may still be in use by the gpu for other mappings. Therefore - * when inserting entries into those, we instead stage those insertions by - * adding insertion data into struct xe_vm_pgtable_update structures. This - * data, (subtrees for the cpu and page-table-entries for the gpu) is then - * added in a separate commit step. CPU-data is committed while still under the - * vm lock, the object lock and for userptr, the notifier lock in read mode. - * The GPU async data is committed either by the GPU or CPU after fulfilling - * relevant dependencies. - * For non-shared page-tables (and, in fact, for shared ones that aren't - * existing at the time of staging), we add the data in-place without the - * special update structures. This private part of the page-table tree will - * remain disconnected from the vm page-table tree until data is committed to - * the shared page tables of the vm tree in the commit phase. - */ - -struct xe_pt_update { - /** @update: The update structure we're building for this parent. */ - struct xe_vm_pgtable_update *update; - /** @parent: The parent. Used to detect a parent change. */ - struct xe_pt *parent; - /** @preexisting: Whether the parent was pre-existing or allocated */ - bool preexisting; -}; - -struct xe_pt_stage_bind_walk { - /** base: The base class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @vm: The vm we're building for. */ - struct xe_vm *vm; - /** @tile: The tile we're building for. */ - struct xe_tile *tile; - /** @default_pte: PTE flag only template. No address is associated */ - u64 default_pte; - /** @dma_offset: DMA offset to add to the PTE. */ - u64 dma_offset; - /** - * @needs_64k: This address range enforces 64K alignment and - * granularity. - */ - bool needs_64K; - /** - * @vma: VMA being mapped - */ - struct xe_vma *vma; - - /* Also input, but is updated during the walk*/ - /** @curs: The DMA address cursor. */ - struct xe_res_cursor *curs; - /** @va_curs_start: The Virtual address coresponding to @curs->start */ - u64 va_curs_start; - - /* Output */ - struct xe_walk_update { - /** @wupd.entries: Caller provided storage. */ - struct xe_vm_pgtable_update *entries; - /** @wupd.num_used_entries: Number of update @entries used. */ - unsigned int num_used_entries; - /** @wupd.updates: Tracks the update entry at a given level */ - struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; - } wupd; - - /* Walk state */ - /** - * @l0_end_addr: The end address of the current l0 leaf. Used for - * 64K granularity detection. - */ - u64 l0_end_addr; - /** @addr_64K: The start address of the current 64K chunk. */ - u64 addr_64K; - /** @found_64: Whether @add_64K actually points to a 64K chunk. */ - bool found_64K; -}; - -static int -xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, - pgoff_t offset, bool alloc_entries) -{ - struct xe_pt_update *upd = &wupd->updates[parent->level]; - struct xe_vm_pgtable_update *entry; - - /* - * For *each level*, we could only have one active - * struct xt_pt_update at any one time. Once we move on to a - * new parent and page-directory, the old one is complete, and - * updates are either already stored in the build tree or in - * @wupd->entries - */ - if (likely(upd->parent == parent)) - return 0; - - upd->parent = parent; - upd->preexisting = true; - - if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) - return -EINVAL; - - entry = wupd->entries + wupd->num_used_entries++; - upd->update = entry; - entry->ofs = offset; - entry->pt_bo = parent->bo; - entry->pt = parent; - entry->flags = 0; - entry->qwords = 0; - entry->pt_bo->update_index = -1; - - if (alloc_entries) { - entry->pt_entries = kmalloc_array(XE_PDES, - sizeof(*entry->pt_entries), - GFP_KERNEL); - if (!entry->pt_entries) - return -ENOMEM; - } - - return 0; -} - -/* - * NOTE: This is a very frequently called function so we allow ourselves - * to annotate (using branch prediction hints) the fastpath of updating a - * non-pre-existing pagetable with leaf ptes. - */ -static int -xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, - pgoff_t offset, struct xe_pt *xe_child, u64 pte) -{ - struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; - struct xe_pt_update *child_upd = xe_child ? - &xe_walk->wupd.updates[xe_child->level] : NULL; - int ret; - - ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); - if (unlikely(ret)) - return ret; - - /* - * Register this new pagetable so that it won't be recognized as - * a shared pagetable by a subsequent insertion. - */ - if (unlikely(child_upd)) { - child_upd->update = NULL; - child_upd->parent = xe_child; - child_upd->preexisting = false; - } - - if (likely(!upd->preexisting)) { - /* Continue building a non-connected subtree. */ - struct iosys_map *map = &parent->bo->vmap; - - if (unlikely(xe_child)) - parent->base.children[offset] = &xe_child->base; - - xe_pt_write(xe_walk->vm->xe, map, offset, pte); - parent->num_live++; - } else { - /* Shared pt. Stage update. */ - unsigned int idx; - struct xe_vm_pgtable_update *entry = upd->update; - - idx = offset - entry->ofs; - entry->pt_entries[idx].pt = xe_child; - entry->pt_entries[idx].pte = pte; - entry->qwords++; - } - - return 0; -} - -static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, - struct xe_pt_stage_bind_walk *xe_walk) -{ - u64 size, dma; - - if (level > MAX_HUGEPTE_LEVEL) - return false; - - /* Does the virtual range requested cover a huge pte? */ - if (!xe_pt_covers(addr, next, level, &xe_walk->base)) - return false; - - /* Does the DMA segment cover the whole pte? */ - if (next - xe_walk->va_curs_start > xe_walk->curs->size) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - /* Is the DMA address huge PTE size aligned? */ - size = next - addr; - dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); - - return IS_ALIGNED(dma, size); -} - -/* - * Scan the requested mapping to check whether it can be done entirely - * with 64K PTEs. - */ -static bool -xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - struct xe_res_cursor curs = *xe_walk->curs; - - if (!IS_ALIGNED(addr, SZ_64K)) - return false; - - if (next > xe_walk->l0_end_addr) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - xe_res_next(&curs, addr - xe_walk->va_curs_start); - for (; addr < next; addr += SZ_64K) { - if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) - return false; - - xe_res_next(&curs, SZ_64K); - } - - return addr == next; -} - -/* - * For non-compact "normal" 4K level-0 pagetables, we want to try to group - * addresses together in 64K-contigous regions to add a 64K TLB hint for the - * device to the PTE. - * This function determines whether the address is part of such a - * segment. For VRAM in normal pagetables, this is strictly necessary on - * some devices. - */ -static bool -xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - /* Address is within an already found 64k region */ - if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) - return true; - - xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); - xe_walk->addr_64K = addr; - - return xe_walk->found_64K; -} - -static int -xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_bind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - u16 pat_index = xe_walk->vma->pat_index; - struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); - struct xe_vm *vm = xe_walk->vm; - struct xe_pt *xe_child; - bool covers; - int ret = 0; - u64 pte; - - /* Is this a leaf entry ?*/ - if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { - struct xe_res_cursor *curs = xe_walk->curs; - bool is_null = xe_vma_is_null(xe_walk->vma); - - XE_WARN_ON(xe_walk->va_curs_start != addr); - - pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : - xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->vma, pat_index, level); - pte |= xe_walk->default_pte; - - /* - * Set the XE_PTE_PS64 hint if possible, otherwise if - * this device *requires* 64K PTE size for VRAM, fail. - */ - if (level == 0 && !xe_parent->is_compact) { - if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; - pte |= XE_PTE_PS64; - } else if (XE_WARN_ON(xe_walk->needs_64K)) { - return -EINVAL; - } - } - - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); - if (unlikely(ret)) - return ret; - - if (!is_null) - xe_res_next(curs, next - addr); - xe_walk->va_curs_start = next; - xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); - *action = ACTION_CONTINUE; - - return ret; - } - - /* - * Descending to lower level. Determine if we need to allocate a - * new page table or -directory, which we do if there is no - * previous one or there is one we can completely replace. - */ - if (level == 1) { - walk->shifts = xe_normal_pt_shifts; - xe_walk->l0_end_addr = next; - } - - covers = xe_pt_covers(addr, next, level, &xe_walk->base); - if (covers || !*child) { - u64 flags = 0; - - xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); - if (IS_ERR(xe_child)) - return PTR_ERR(xe_child); - - xe_pt_set_addr(xe_child, - round_down(addr, 1ull << walk->shifts[level])); - - if (!covers) - xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child); - - *child = &xe_child->base; - - /* - * Prefer the compact pagetable layout for L0 if possible. Only - * possible if VMA covers entire 2MB region as compact 64k and - * 4k pages cannot be mixed within a 2MB region. - * TODO: Suballocate the pt bo to avoid wasting a lot of - * memory. - */ - if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && - covers && xe_pt_scan_64K(addr, next, xe_walk)) { - walk->shifts = xe_compact_pt_shifts; - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT; - flags |= XE_PDE_64K; - xe_child->is_compact = true; - } - - pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, - pte); - } - - *action = ACTION_SUBTREE; - return ret; -} - -static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { - .pt_entry = xe_pt_stage_bind_entry, -}; - -/** - * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address - * range. - * @tile: The tile we're building for. - * @vma: The vma indicating the address range. - * @entries: Storage for the update entries used for connecting the tree to - * the main tree at commit time. - * @num_entries: On output contains the number of @entries used. - * - * This function builds a disconnected page-table tree for a given address - * range. The tree is connected to the main vm tree for the gpu using - * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). - * The function builds xe_vm_pgtable_update structures for already existing - * shared page-tables, and non-existing shared and non-shared page-tables - * are built and populated directly. - * - * Return 0 on success, negative error code on error. - */ -static int -xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - struct xe_device *xe = tile_to_xe(tile); - struct xe_bo *bo = xe_vma_bo(vma); - bool is_devmem = !xe_vma_is_userptr(vma) && bo && - (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); - struct xe_res_cursor curs; - struct xe_pt_stage_bind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_bind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .vm = xe_vma_vm(vma), - .tile = tile, - .curs = &curs, - .va_curs_start = xe_vma_start(vma), - .vma = vma, - .wupd.entries = entries, - .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - int ret; - - /** - * Default atomic expectations for different allocation scenarios are as follows: - * - * 1. Traditional API: When the VM is not in LR mode: - * - Device atomics are expected to function with all allocations. - * - * 2. Compute/SVM API: When the VM is in LR mode: - * - Device atomics are the default behavior when the bo is placed in a single region. - * - In all other cases device atomics will be disabled with AE=0 until an application - * request differently using a ioctl like madvise. - */ - if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { - if (xe_vm_in_lr_mode(xe_vma_vm(vma))) { - if (bo && xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - /** - * If a SMEM+LMEM allocation is backed by SMEM, a device - * atomics will cause a gpu page fault and which then - * gets migrated to LMEM, bind such allocations with - * device atomics enabled. - */ - else if (is_devmem && !xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } else { - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } - - /** - * Unset AE if the platform(PVC) doesn't support it on an - * allocation - */ - if (!xe->info.has_device_atomics_on_smem && !is_devmem) - xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE; - } - - if (is_devmem) { - xe_walk.default_pte |= XE_PPGTT_PTE_DM; - xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); - } - - if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) - xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); - - xe_bo_assert_held(bo); - - if (!xe_vma_is_null(vma)) { - if (xe_vma_is_userptr(vma)) - xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, - xe_vma_size(vma), &curs); - else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) - xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - else - xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - } else { - curs.size = xe_vma_size(vma); - } - - ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - *num_entries = xe_walk.wupd.num_used_entries; - return ret; -} - -/** - * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a - * shared pagetable. - * @addr: The start address within the non-shared pagetable. - * @end: The end address within the non-shared pagetable. - * @level: The level of the non-shared pagetable. - * @walk: Walk info. The function adjusts the walk action. - * @action: next action to perform (see enum page_walk_action) - * @offset: Ignored on input, First non-shared entry on output. - * @end_offset: Ignored on input, Last non-shared entry + 1 on output. - * - * A non-shared page-table has some entries that belong to the address range - * and others that don't. This function determines the entries that belong - * fully to the address range. Depending on level, some entries may - * partially belong to the address range (that can't happen at level 0). - * The function detects that and adjust those offsets to not include those - * partial entries. Iff it does detect partial entries, we know that there must - * be shared page tables also at lower levels, so it adjusts the walk action - * accordingly. - * - * Return: true if there were non-shared entries, false otherwise. - */ -static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, - struct xe_pt_walk *walk, - enum page_walk_action *action, - pgoff_t *offset, pgoff_t *end_offset) -{ - u64 size = 1ull << walk->shifts[level]; - - *offset = xe_pt_offset(addr, level, walk); - *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; - - if (!level) - return true; - - /* - * If addr or next are not size aligned, there are shared pts at lower - * level, so in that case traverse down the subtree - */ - *action = ACTION_CONTINUE; - if (!IS_ALIGNED(addr, size)) { - *action = ACTION_SUBTREE; - (*offset)++; - } - - if (!IS_ALIGNED(end, size)) { - *action = ACTION_SUBTREE; - (*end_offset)--; - } - - return *end_offset > *offset; -} - -struct xe_pt_zap_ptes_walk { - /** @base: The walk base-class */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're building for */ - struct xe_tile *tile; - - /* Output */ - /** @needs_invalidate: Whether we need to invalidate TLB*/ - bool needs_invalidate; -}; - -static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_zap_ptes_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - /* - * Note that we're called from an entry callback, and we're dealing - * with the child of that entry rather than the parent, so need to - * adjust level down. - */ - if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, - &end_offset)) { - xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap, - offset * sizeof(u64), 0, - (end_offset - offset) * sizeof(u64)); - xe_walk->needs_invalidate = true; - } - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { - .pt_entry = xe_pt_zap_ptes_entry, -}; - -/** - * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range - * @tile: The tile we're zapping for. - * @vma: GPU VMA detailing address range. - * - * Eviction and Userptr invalidation needs to be able to zap the - * gpu ptes of a given address range in pagefaulting mode. - * In order to be able to do that, that function needs access to the shared - * page-table entrieaso it can either clear the leaf PTEs or - * clear the pointers to lower-level page-tables. The caller is required - * to hold the necessary locks to ensure neither the page-table connectivity - * nor the page-table entries of the range is updated from under us. - * - * Return: Whether ptes were actually updated and a TLB invalidation is - * required. - */ -bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) -{ - struct xe_pt_zap_ptes_walk xe_walk = { - .base = { - .ops = &xe_pt_zap_ptes_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); - - if (!(pt_mask & BIT(tile->id))) - return false; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.needs_invalidate; -} - -static void -xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, - struct iosys_map *map, void *data, - u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_pt_entry *ptes = update->pt_entries; - u64 *ptr = data; - u32 i; - - for (i = 0; i < num_qwords; i++) { - if (map) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, ptes[i].pte); - else - ptr[i] = ptes[i].pte; - } -} - -static void xe_pt_abort_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i, j; - - for (i = 0; i < num_entries; i++) { - if (!entries[i].pt_entries) - continue; - - for (j = 0; j < entries[i].qwords; j++) - xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL); - kfree(entries[i].pt_entries); - } -} - -static void xe_pt_commit_locks_assert(struct xe_vma *vma) -{ - struct xe_vm *vm = xe_vma_vm(vma); - - lockdep_assert_held(&vm->lock); - - if (!xe_vma_is_userptr(vma) && !xe_vma_is_null(vma)) - dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); - - xe_vm_assert_held(vm); -} - -static void xe_pt_commit_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries, bool rebind, - struct llist_head *deferred) -{ - u32 i, j; - - xe_pt_commit_locks_assert(vma); - - for (i = 0; i < num_entries; i++) { - struct xe_pt *pt = entries[i].pt; - struct xe_pt_dir *pt_dir; - - if (!rebind) - pt->num_live += entries[i].qwords; - - if (!pt->level) - continue; - - pt_dir = as_xe_pt_dir(pt); - for (j = 0; j < entries[i].qwords; j++) { - u32 j_ = j + entries[i].ofs; - struct xe_pt *newpte = entries[i].pt_entries[j].pt; - - if (xe_pt_entry(pt_dir, j_)) - xe_pt_destroy(xe_pt_entry(pt_dir, j_), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[j_] = &newpte->base; - } - } -} - -static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i; - - for (i = 0; i < num_entries; i++) - kfree(entries[i].pt_entries); -} - -static int -xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - int err; - - *num_entries = 0; - err = xe_pt_stage_bind(tile, vma, entries, num_entries); - if (!err) - xe_tile_assert(tile, *num_entries); - else /* abort! */ - xe_pt_abort_bind(vma, entries, *num_entries); - - return err; -} - -static void xe_vm_dbg_print_entries(struct xe_device *xe, - const struct xe_vm_pgtable_update *entries, - unsigned int num_entries, bool bind) -#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) -{ - unsigned int i; - - vm_dbg(&xe->drm, "%s: %u entries to update\n", bind ? "bind" : "unbind", - num_entries); - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - struct xe_pt *xe_pt = entry->pt; - u64 page_size = 1ull << xe_pt_shift(xe_pt->level); - u64 end; - u64 start; - - xe_assert(xe, !entry->pt->is_compact); - start = entry->ofs * page_size; - end = start + page_size * entry->qwords; - vm_dbg(&xe->drm, - "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", - i, xe_pt->level, entry->ofs, entry->qwords, - xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); - } -} -#else -{} -#endif - -static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs) -{ - int i; - - for (i = 0; i < num_syncs; i++) { - struct dma_fence *fence = syncs[i].fence; - - if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &fence->flags)) - return false; - } - - return true; -} - -static int job_test_add_deps(struct xe_sched_job *job, - struct dma_resv *resv, - enum dma_resv_usage usage) -{ - if (!job) { - if (!dma_resv_test_signaled(resv, usage)) - return -ETIME; - - return 0; - } - - return xe_sched_job_add_deps(job, resv, usage); -} - -static int vma_add_deps(struct xe_vma *vma, struct xe_sched_job *job) -{ - struct xe_bo *bo = xe_vma_bo(vma); - - xe_bo_assert_held(bo); - - if (bo && !bo->vm) - return job_test_add_deps(job, bo->ttm.base.resv, - DMA_RESV_USAGE_KERNEL); - - return 0; -} - -static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op, - struct xe_sched_job *job) -{ - int err = 0; - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = vma_add_deps(op->map.vma, job); - break; - case DRM_GPUVA_OP_REMAP: - if (op->remap.prev) - err = vma_add_deps(op->remap.prev, job); - if (!err && op->remap.next) - err = vma_add_deps(op->remap.next, job); - break; - case DRM_GPUVA_OP_UNMAP: - break; - case DRM_GPUVA_OP_PREFETCH: - err = vma_add_deps(gpuva_to_vma(op->base.prefetch.va), job); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static int xe_pt_vm_dependencies(struct xe_sched_job *job, - struct xe_vm *vm, - struct xe_vma_ops *vops, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_range_fence_tree *rftree) -{ - struct xe_range_fence *rtfence; - struct dma_fence *fence; - struct xe_vma_op *op; - int err = 0, i; - - xe_vm_assert_held(vm); - - if (!job && !no_in_syncs(vops->syncs, vops->num_syncs)) - return -ETIME; - - if (!job && !xe_exec_queue_is_idle(pt_update_ops->q)) - return -ETIME; - - if (pt_update_ops->wait_vm_bookkeep || pt_update_ops->wait_vm_kernel) { - err = job_test_add_deps(job, xe_vm_resv(vm), - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_BOOKKEEP : - DMA_RESV_USAGE_KERNEL); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_first(rftree, pt_update_ops->start, - pt_update_ops->last); - while (rtfence) { - fence = rtfence->fence; - - if (!dma_fence_is_signaled(fence)) { - /* - * Is this a CPU update? GPU is busy updating, so return - * an error - */ - if (!job) - return -ETIME; - - dma_fence_get(fence); - err = drm_sched_job_add_dependency(&job->drm, fence); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_next(rtfence, - pt_update_ops->start, - pt_update_ops->last); - } - - list_for_each_entry(op, &vops->list, link) { - err = op_add_deps(vm, op, job); - if (err) - return err; - } - - if (job) - err = xe_sched_job_last_fence_add_dep(job, vm); - else - err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); - - for (i = 0; job && !err && i < vops->num_syncs; i++) - err = xe_sync_entry_add_deps(&vops->syncs[i], job); - - return err; -} - -static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_vma_ops *vops = pt_update->vops; - struct xe_vm *vm = vops->vm; - struct xe_range_fence_tree *rftree = &vm->rftree[pt_update->tile_id]; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[pt_update->tile_id]; - - return xe_pt_vm_dependencies(pt_update->job, vm, pt_update->vops, - pt_update_ops, rftree); -} - -#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; - static u32 count; - - if (count++ % divisor == divisor - 1) { - uvma->userptr.divisor = divisor << 1; - return true; - } - - return false; -} - -#else - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - return false; -} - -#endif - -static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, - struct xe_vm_pgtable_update_ops *pt_update) -{ - struct xe_userptr_vma *uvma; - unsigned long notifier_seq; - - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - if (!xe_vma_is_userptr(vma)) - return 0; - - uvma = to_userptr_vma(vma); - notifier_seq = uvma->userptr.notifier_seq; - - if (uvma->userptr.initial_bind && !xe_vm_in_fault_mode(vm)) - return 0; - - if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq) && - !xe_pt_userptr_inject_eagain(uvma)) - return 0; - - if (xe_vm_in_fault_mode(vm)) { - return -EAGAIN; - } else { - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - - if (xe_vm_in_preempt_fence_mode(vm)) { - struct dma_resv_iter cursor; - struct dma_fence *fence; - long err; - - dma_resv_iter_begin(&cursor, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); - dma_resv_for_each_fence_unlocked(&cursor, fence) - dma_fence_enable_sw_signaling(fence); - dma_resv_iter_end(&cursor); - - err = dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - XE_WARN_ON(err <= 0); - } - } - - return 0; -} - -static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op, - struct xe_vm_pgtable_update_ops *pt_update) -{ - int err = 0; - - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = vma_check_userptr(vm, op->map.vma, pt_update); - break; - case DRM_GPUVA_OP_REMAP: - if (op->remap.prev) - err = vma_check_userptr(vm, op->remap.prev, pt_update); - if (!err && op->remap.next) - err = vma_check_userptr(vm, op->remap.next, pt_update); - break; - case DRM_GPUVA_OP_UNMAP: - break; - case DRM_GPUVA_OP_PREFETCH: - err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), - pt_update); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_vm *vm = pt_update->vops->vm; - struct xe_vma_ops *vops = pt_update->vops; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[pt_update->tile_id]; - struct xe_vma_op *op; - int err; - - err = xe_pt_pre_commit(pt_update); - if (err) - return err; - - down_read(&vm->userptr.notifier_lock); - - list_for_each_entry(op, &vops->list, link) { - err = op_check_userptr(vm, op, pt_update_ops); - if (err) { - up_read(&vm->userptr.notifier_lock); - break; - } - } - - return err; -} - -struct invalidation_fence { - struct xe_gt_tlb_invalidation_fence base; - struct xe_gt *gt; - struct dma_fence *fence; - struct dma_fence_cb cb; - struct work_struct work; - u64 start; - u64 end; - u32 asid; -}; - -static void invalidation_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct invalidation_fence *ifence = - container_of(cb, struct invalidation_fence, cb); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); - if (!ifence->fence->error) { - queue_work(system_wq, &ifence->work); - } else { - ifence->base.base.error = ifence->fence->error; - dma_fence_signal(&ifence->base.base); - dma_fence_put(&ifence->base.base); - } - dma_fence_put(ifence->fence); -} - -static void invalidation_fence_work_func(struct work_struct *w) -{ - struct invalidation_fence *ifence = - container_of(w, struct invalidation_fence, work); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); - xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, - ifence->end, ifence->asid); -} - -static int invalidation_fence_init(struct xe_gt *gt, - struct invalidation_fence *ifence, - struct dma_fence *fence, - u64 start, u64 end, u32 asid) -{ - int ret; - - trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - - ifence->fence = fence; - ifence->gt = gt; - ifence->start = start; - ifence->end = end; - ifence->asid = asid; - - INIT_WORK(&ifence->work, invalidation_fence_work_func); - ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); - if (ret == -ENOENT) { - dma_fence_put(ifence->fence); /* Usually dropped in CB */ - invalidation_fence_work_func(&ifence->work); - } else if (ret) { - dma_fence_put(&ifence->base.base); /* Caller ref */ - dma_fence_put(&ifence->base.base); /* Creation ref */ - } - - xe_gt_assert(gt, !ret || ret == -ENOENT); - - return ret && ret != -ENOENT ? ret : 0; -} - -struct xe_pt_stage_unbind_walk { - /** @base: The pagewalk base-class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're unbinding from. */ - struct xe_tile *tile; - - /** - * @modified_start: Walk range start, modified to include any - * shared pagetables that we're the only user of and can thus - * treat as private. - */ - u64 modified_start; - /** @modified_end: Walk range start, modified like @modified_start. */ - u64 modified_end; - - /* Output */ - /* @wupd: Structure to track the page-table updates we're building */ - struct xe_walk_update wupd; -}; - -/* - * Check whether this range is the only one populating this pagetable, - * and in that case, update the walk range checks so that higher levels don't - * view us as a shared pagetable. - */ -static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, - const struct xe_pt *child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - unsigned int shift = walk->shifts[level]; - u64 size = 1ull << shift; - - if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && - ((next - addr) >> shift) == child->num_live) { - u64 size = 1ull << walk->shifts[level + 1]; - - *action = ACTION_CONTINUE; - - if (xe_walk->modified_start >= addr) - xe_walk->modified_start = round_down(addr, size); - if (xe_walk->modified_end <= next) - xe_walk->modified_end = round_up(next, size); - - return true; - } - - return false; -} - -static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); - - return 0; -} - -static int -xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - u64 size = 1ull << walk->shifts[--level]; - - if (!IS_ALIGNED(addr, size)) - addr = xe_walk->modified_start; - if (!IS_ALIGNED(next, size)) - next = xe_walk->modified_end; - - /* Parent == *child is the root pt. Don't kill it. */ - if (parent != *child && - xe_pt_check_kill(addr, next, level, xe_child, action, walk)) - return 0; - - if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, - &end_offset)) - return 0; - - (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false); - xe_walk->wupd.updates[level].update->qwords = end_offset - offset; - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { - .pt_entry = xe_pt_stage_unbind_entry, - .pt_post_descend = xe_pt_stage_unbind_post_descend, -}; - -/** - * xe_pt_stage_unbind() - Build page-table update structures for an unbind - * operation - * @tile: The tile we're unbinding for. - * @vma: The vma we're unbinding. - * @entries: Caller-provided storage for the update structures. - * - * Builds page-table update structures for an unbind operation. The function - * will attempt to remove all page-tables that we're the only user - * of, and for that to work, the unbind operation must be committed in the - * same critical section that blocks racing binds to the same page-table tree. - * - * Return: The number of entries used. - */ -static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries) -{ - struct xe_pt_stage_unbind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_unbind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - .modified_start = xe_vma_start(vma), - .modified_end = xe_vma_end(vma), - .wupd.entries = entries, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.wupd.num_used_entries; -} - -static void -xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, - struct xe_tile *tile, struct iosys_map *map, - void *ptr, u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_vm *vm = pt_update->vops->vm; - u64 empty = __xe_pt_empty_pte(tile, vm, update->pt->level); - int i; - - if (map && map->is_iomem) - for (i = 0; i < num_qwords; ++i) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, empty); - else if (map) - memset64(map->vaddr + qword_ofs * sizeof(u64), empty, - num_qwords); - else - memset64(ptr, empty, num_qwords); -} - -static void -xe_pt_commit_unbind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 num_entries, - struct llist_head *deferred) -{ - u32 j; - - xe_pt_commit_locks_assert(vma); - - for (j = 0; j < num_entries; ++j) { - struct xe_vm_pgtable_update *entry = &entries[j]; - struct xe_pt *pt = entry->pt; - - pt->num_live -= entry->qwords; - if (pt->level) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - u32 i; - - for (i = entry->ofs; i < entry->ofs + entry->qwords; - i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[i] = NULL; - } - } - } -} - -static void -xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int i, level = 0; - u64 start, last; - - for (i = 0; i < pt_op->num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &pt_op->entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level)); - last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1; - - if (start < pt_update_ops->start) - pt_update_ops->start = start; - if (last > pt_update_ops->last) - pt_update_ops->last = last; -} - -static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) -{ - int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0; - - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, - xe->info.tile_count << shift); - - return 0; -} - -static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - struct llist_head *deferred = &pt_update_ops->deferred; - int err; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - pt_op->vma = NULL; - pt_op->bind = true; - pt_op->rebind = BIT(tile->id) & vma->tile_present; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - err = xe_pt_prepare_bind(tile, vma, pt_op->entries, - &pt_op->num_entries); - if (!err) { - xe_tile_assert(tile, pt_op->num_entries <= - ARRAY_SIZE(pt_op->entries)); - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, true); - - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. On LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * On !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!pt_op->rebind && xe_vm_has_scratch(vm) && - xe_vm_in_preempt_fence_mode(vm))) - pt_update_ops->needs_invalidation = true; - else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - - /* FIXME: Don't commit right away */ - vma->tile_staged |= BIT(tile->id); - pt_op->vma = vma; - xe_pt_commit_bind(vma, pt_op->entries, pt_op->num_entries, - pt_op->rebind, deferred); - } - - return err; -} - -static int unbind_op_prepare(struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - struct llist_head *deferred = &pt_update_ops->deferred; - int err; - - if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id))) - return 0; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - /* - * Wait for invalidation to complete. Can corrupt internal page table - * state if an invalidation is running while preparing an unbind. - */ - if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma))) - mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier); - - pt_op->vma = vma; - pt_op->bind = false; - pt_op->rebind = false; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries); - - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, false); - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - pt_update_ops->needs_invalidation = true; - - /* FIXME: Don't commit right away */ - xe_pt_commit_unbind(vma, pt_op->entries, pt_op->num_entries, - deferred); - - return 0; -} - -static int op_prepare(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op) -{ - int err = 0; - - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma); - pt_update_ops->wait_vm_kernel = true; - break; - case DRM_GPUVA_OP_REMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va)); - - if (!err && op->remap.prev) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.prev); - pt_update_ops->wait_vm_bookkeep = true; - } - if (!err && op->remap.next) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.next); - pt_update_ops->wait_vm_bookkeep = true; - } - break; - case DRM_GPUVA_OP_UNMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va)); - break; - case DRM_GPUVA_OP_PREFETCH: - err = bind_op_prepare(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va)); - pt_update_ops->wait_vm_kernel = true; - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static void -xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops) -{ - init_llist_head(&pt_update_ops->deferred); - pt_update_ops->start = ~0x0ull; - pt_update_ops->last = 0x0ull; -} - -/** - * xe_pt_update_ops_prepare() - Prepare PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Prepare PT update operations which includes updating internal PT state, - * allocate memory for page tables, populate page table being pruned in, and - * create PT update operations for leaf insertion / removal. - * - * Return: 0 on success, negative error code on error. - */ -int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct xe_vma_op *op; - int shift = tile->media_gt ? 1 : 0; - int err; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - xe_pt_update_ops_init(pt_update_ops); - - err = dma_resv_reserve_fences(xe_vm_resv(vops->vm), - tile_to_xe(tile)->info.tile_count << shift); - if (err) - return err; - - list_for_each_entry(op, &vops->list, link) { - err = op_prepare(vops->vm, tile, pt_update_ops, op); - - if (err) - return err; - } - - xe_tile_assert(tile, pt_update_ops->current_op <= - pt_update_ops->num_ops); - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE) - return -ENOSPC; -#endif - - return 0; -} - -static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present |= BIT(tile->id); - vma->tile_staged &= ~BIT(tile->id); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - to_userptr_vma(vma)->userptr.initial_bind = true; - } - - /* - * Kick rebind worker if this bind triggers preempt fences and not in - * the rebind worker - */ - if (pt_update_ops->wait_vm_bookkeep && - xe_vm_in_preempt_fence_mode(vm) && - !current->mm) - xe_vm_queue_rebind_worker(vm); -} - -static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present &= ~BIT(tile->id); - if (!vma->tile_present) { - list_del_init(&vma->combined_links.rebind); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - } -} - -static void op_commit(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op, struct dma_fence *fence, - struct dma_fence *fence2) -{ - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, - fence2); - break; - case DRM_GPUVA_OP_REMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence, - fence2); - - if (op->remap.prev) - bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence, fence2); - if (op->remap.next) - bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence, fence2); - break; - case DRM_GPUVA_OP_UNMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence, fence2); - break; - case DRM_GPUVA_OP_PREFETCH: - bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence, fence2); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } -} - -static const struct xe_migrate_pt_update_ops migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -/** - * xe_pt_update_ops_run() - Run PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Run PT update operations which includes committing internal PT state changes, - * creating job for PT update operations for leaf insertion / removal, and - * installing job fence in various places. - * - * Return: fence on success, negative ERR_PTR on error. - */ -struct dma_fence * -xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm *vm = vops->vm; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL, *mfence = NULL; - struct dma_fence **fences = NULL; - struct dma_fence_array *cf = NULL; - struct xe_range_fence *rfence; - struct xe_vma_op *op; - int err = 0; - struct xe_migrate_pt_update update = { - .ops = pt_update_ops->needs_userptr_lock ? - &userptr_migrate_ops : - &migrate_ops, - .vops = vops, - .tile_id = tile->id, - }; - - lockdep_assert_held(&vm->lock); - xe_vm_assert_held(vm); - - if (!pt_update_ops->current_op) { - xe_tile_assert(tile, xe_vm_in_fault_mode(vm)); - - return dma_fence_get_stub(); - } - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vm->xe->vm_inject_error_position == FORCE_OP_ERROR_RUN) - return ERR_PTR(-ENOSPC); -#endif - - if (pt_update_ops->needs_invalidation) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); -<<<<<<< - if (!ifence) - return ERR_PTR(-ENOMEM); -======= - if (!ifence) { - err = -ENOMEM; - goto kill_vm_tile1; - } - if (tile->media_gt) { - mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!mfence) { - err = -ENOMEM; - goto free_ifence; - } - fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL); - if (!fences) { - err = -ENOMEM; - goto free_ifence; - } - cf = dma_fence_array_alloc(2); - if (!cf) { - err = -ENOMEM; - goto free_ifence; - } - } ->>>>>>> - } - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - err = -ENOMEM; - goto free_ifence; - } - - fence = xe_migrate_update_pgtables(tile->migrate, &update); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - goto free_rfence; - } - - if (xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - pt_update_ops->start, - pt_update_ops->last, fence)) - dma_fence_wait(fence, false); - - /* tlb invalidation must be done before signaling rebind */ - if (ifence) { -<<<<<<< - err = invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, - vm->usm.asid); - if (err) - goto put_fence; - fence = &ifence->base.base; -======= - if (mfence) - dma_fence_get(fence); - invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - if (mfence) { - invalidation_fence_init(tile->media_gt, mfence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - fences[0] = &ifence->base.base; - fences[1] = &mfence->base.base; - dma_fence_array_init(cf, 2, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - fence = &cf->base; - } else { - fence = &ifence->base.base; - } ->>>>>>> - } - - if (!mfence) { - dma_resv_add_fence(xe_vm_resv(vm), fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); - } else { - dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, - &ifence->base.base, &mfence->base.base); - } - - if (pt_update_ops->needs_userptr_lock) - up_read(&vm->userptr.notifier_lock); - - return fence; - -put_fence: - if (pt_update_ops->needs_userptr_lock) - up_read(&vm->userptr.notifier_lock); - dma_fence_put(fence); -free_rfence: - kfree(rfence); -free_ifence: - kfree(cf); - kfree(fences); - kfree(mfence); - kfree(ifence); - - return ERR_PTR(err); -} - -/** - * xe_pt_update_ops_fini() - Finish PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operations - * - * Finish PT update operations by committing to destroy page table memory - */ -void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - /* FIXME: Not 100% correct */ - for (i = 0; i < pt_update_ops->num_ops; ++i) { - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; - - if (pt_op->bind) - xe_pt_free_bind(pt_op->entries, pt_op->num_entries); - } - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); -} - -/** - * xe_pt_update_ops_abort() - Abort PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Abort PT update operations by unwinding internal PT state - */ -void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - /* FIXME: Just kill VM for now + cleanup PTs */ - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); - xe_vm_kill(vops->vm, false); -} diff --git a/rr-cache/bc3bc775dae1d19bda4dd320b9463ba777208695/preimage.1 b/rr-cache/bc3bc775dae1d19bda4dd320b9463ba777208695/preimage.1 deleted file mode 100644 index bbb3bac849b2..000000000000 --- a/rr-cache/bc3bc775dae1d19bda4dd320b9463ba777208695/preimage.1 +++ /dev/null @@ -1,2102 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include <linux/dma-fence-array.h> - -#include "xe_pt.h" - -#include "regs/xe_gtt_defs.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_exec_queue.h" -#include "xe_gt.h" -#include "xe_gt_tlb_invalidation.h" -#include "xe_migrate.h" -#include "xe_pt_types.h" -#include "xe_pt_walk.h" -#include "xe_res_cursor.h" -#include "xe_sched_job.h" -#include "xe_sync.h" -#include "xe_trace.h" -#include "xe_ttm_stolen_mgr.h" -#include "xe_vm.h" - -struct xe_pt_dir { - struct xe_pt pt; - /** @children: Array of page-table child nodes */ - struct xe_ptw *children[XE_PDES]; -}; - -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) -#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) -#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) -#else -#define xe_pt_set_addr(__xe_pt, __addr) -#define xe_pt_addr(__xe_pt) 0ull -#endif - -static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; -static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; - -#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) - -static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) -{ - return container_of(pt, struct xe_pt_dir, pt); -} - -static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) -{ - return container_of(pt_dir->children[index], struct xe_pt, base); -} - -static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, - unsigned int level) -{ - struct xe_device *xe = tile_to_xe(tile); - u16 pat_index = xe->pat.idx[XE_CACHE_WB]; - u8 id = tile->id; - - if (!xe_vm_has_scratch(vm)) - return 0; - - if (level > MAX_HUGEPTE_LEVEL) - return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, - 0, pat_index); - - return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | - XE_PTE_NULL; -} - -static void xe_pt_free(struct xe_pt *pt) -{ - if (pt->level) - kfree(as_xe_pt_dir(pt)); - else - kfree(pt); -} - -/** - * xe_pt_create() - Create a page-table. - * @vm: The vm to create for. - * @tile: The tile to create for. - * @level: The page-table level. - * - * Allocate and initialize a single struct xe_pt metadata structure. Also - * create the corresponding page-table bo, but don't initialize it. If the - * level is grater than zero, then it's assumed to be a directory page- - * table and the directory structure is also allocated and initialized to - * NULL pointers. - * - * Return: A valid struct xe_pt pointer on success, Pointer error code on - * error. - */ -struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level) -{ - struct xe_pt *pt; - struct xe_bo *bo; - int err; - - if (level) { - struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL); - - pt = (dir) ? &dir->pt : NULL; - } else { - pt = kzalloc(sizeof(*pt), GFP_KERNEL); - } - if (!pt) - return ERR_PTR(-ENOMEM); - - pt->level = level; - bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_NO_RESV_EVICT | - XE_BO_FLAG_PAGETABLE); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - goto err_kfree; - } - pt->bo = bo; - pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; - - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, pt->bo); - xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL); - - return pt; - -err_kfree: - xe_pt_free(pt); - return ERR_PTR(err); -} - -/** - * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero - * entries. - * @tile: The tile the scratch pagetable of which to use. - * @vm: The vm we populate for. - * @pt: The pagetable the bo of which to initialize. - * - * Populate the page-table bo of @pt with entries pointing into the tile's - * scratch page-table tree if any. Otherwise populate with zeros. - */ -void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, - struct xe_pt *pt) -{ - struct iosys_map *map = &pt->bo->vmap; - u64 empty; - int i; - - if (!xe_vm_has_scratch(vm)) { - /* - * FIXME: Some memory is allocated already allocated to zero? - * Find out which memory that is and avoid this memset... - */ - xe_map_memset(vm->xe, map, 0, 0, SZ_4K); - } else { - empty = __xe_pt_empty_pte(tile, vm, pt->level); - for (i = 0; i < XE_PDES; i++) - xe_pt_write(vm->xe, map, i, empty); - } -} - -/** - * xe_pt_shift() - Return the ilog2 value of the size of the address range of - * a page-table at a certain level. - * @level: The level. - * - * Return: The ilog2 value of the size of the address range of a page-table - * at level @level. - */ -unsigned int xe_pt_shift(unsigned int level) -{ - return XE_PTE_SHIFT + XE_PDE_SHIFT * level; -} - -/** - * xe_pt_destroy() - Destroy a page-table tree. - * @pt: The root of the page-table tree to destroy. - * @flags: vm flags. Currently unused. - * @deferred: List head of lockless list for deferred putting. NULL for - * immediate putting. - * - * Puts the page-table bo, recursively calls xe_pt_destroy on all children - * and finally frees @pt. TODO: Can we remove the @flags argument? - */ -void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) -{ - int i; - - if (!pt) - return; - - XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list)); - xe_bo_unpin(pt->bo); - xe_bo_put_deferred(pt->bo, deferred); - - if (pt->level > 0 && pt->num_live) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - - for (i = 0; i < XE_PDES; i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, - deferred); - } - } - xe_pt_free(pt); -} - -/** - * DOC: Pagetable building - * - * Below we use the term "page-table" for both page-directories, containing - * pointers to lower level page-directories or page-tables, and level 0 - * page-tables that contain only page-table-entries pointing to memory pages. - * - * When inserting an address range in an already existing page-table tree - * there will typically be a set of page-tables that are shared with other - * address ranges, and a set that are private to this address range. - * The set of shared page-tables can be at most two per level, - * and those can't be updated immediately because the entries of those - * page-tables may still be in use by the gpu for other mappings. Therefore - * when inserting entries into those, we instead stage those insertions by - * adding insertion data into struct xe_vm_pgtable_update structures. This - * data, (subtrees for the cpu and page-table-entries for the gpu) is then - * added in a separate commit step. CPU-data is committed while still under the - * vm lock, the object lock and for userptr, the notifier lock in read mode. - * The GPU async data is committed either by the GPU or CPU after fulfilling - * relevant dependencies. - * For non-shared page-tables (and, in fact, for shared ones that aren't - * existing at the time of staging), we add the data in-place without the - * special update structures. This private part of the page-table tree will - * remain disconnected from the vm page-table tree until data is committed to - * the shared page tables of the vm tree in the commit phase. - */ - -struct xe_pt_update { - /** @update: The update structure we're building for this parent. */ - struct xe_vm_pgtable_update *update; - /** @parent: The parent. Used to detect a parent change. */ - struct xe_pt *parent; - /** @preexisting: Whether the parent was pre-existing or allocated */ - bool preexisting; -}; - -struct xe_pt_stage_bind_walk { - /** base: The base class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @vm: The vm we're building for. */ - struct xe_vm *vm; - /** @tile: The tile we're building for. */ - struct xe_tile *tile; - /** @default_pte: PTE flag only template. No address is associated */ - u64 default_pte; - /** @dma_offset: DMA offset to add to the PTE. */ - u64 dma_offset; - /** - * @needs_64k: This address range enforces 64K alignment and - * granularity. - */ - bool needs_64K; - /** - * @vma: VMA being mapped - */ - struct xe_vma *vma; - - /* Also input, but is updated during the walk*/ - /** @curs: The DMA address cursor. */ - struct xe_res_cursor *curs; - /** @va_curs_start: The Virtual address coresponding to @curs->start */ - u64 va_curs_start; - - /* Output */ - struct xe_walk_update { - /** @wupd.entries: Caller provided storage. */ - struct xe_vm_pgtable_update *entries; - /** @wupd.num_used_entries: Number of update @entries used. */ - unsigned int num_used_entries; - /** @wupd.updates: Tracks the update entry at a given level */ - struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; - } wupd; - - /* Walk state */ - /** - * @l0_end_addr: The end address of the current l0 leaf. Used for - * 64K granularity detection. - */ - u64 l0_end_addr; - /** @addr_64K: The start address of the current 64K chunk. */ - u64 addr_64K; - /** @found_64: Whether @add_64K actually points to a 64K chunk. */ - bool found_64K; -}; - -static int -xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, - pgoff_t offset, bool alloc_entries) -{ - struct xe_pt_update *upd = &wupd->updates[parent->level]; - struct xe_vm_pgtable_update *entry; - - /* - * For *each level*, we could only have one active - * struct xt_pt_update at any one time. Once we move on to a - * new parent and page-directory, the old one is complete, and - * updates are either already stored in the build tree or in - * @wupd->entries - */ - if (likely(upd->parent == parent)) - return 0; - - upd->parent = parent; - upd->preexisting = true; - - if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) - return -EINVAL; - - entry = wupd->entries + wupd->num_used_entries++; - upd->update = entry; - entry->ofs = offset; - entry->pt_bo = parent->bo; - entry->pt = parent; - entry->flags = 0; - entry->qwords = 0; - entry->pt_bo->update_index = -1; - - if (alloc_entries) { - entry->pt_entries = kmalloc_array(XE_PDES, - sizeof(*entry->pt_entries), - GFP_KERNEL); - if (!entry->pt_entries) - return -ENOMEM; - } - - return 0; -} - -/* - * NOTE: This is a very frequently called function so we allow ourselves - * to annotate (using branch prediction hints) the fastpath of updating a - * non-pre-existing pagetable with leaf ptes. - */ -static int -xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, - pgoff_t offset, struct xe_pt *xe_child, u64 pte) -{ - struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; - struct xe_pt_update *child_upd = xe_child ? - &xe_walk->wupd.updates[xe_child->level] : NULL; - int ret; - - ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); - if (unlikely(ret)) - return ret; - - /* - * Register this new pagetable so that it won't be recognized as - * a shared pagetable by a subsequent insertion. - */ - if (unlikely(child_upd)) { - child_upd->update = NULL; - child_upd->parent = xe_child; - child_upd->preexisting = false; - } - - if (likely(!upd->preexisting)) { - /* Continue building a non-connected subtree. */ - struct iosys_map *map = &parent->bo->vmap; - - if (unlikely(xe_child)) - parent->base.children[offset] = &xe_child->base; - - xe_pt_write(xe_walk->vm->xe, map, offset, pte); - parent->num_live++; - } else { - /* Shared pt. Stage update. */ - unsigned int idx; - struct xe_vm_pgtable_update *entry = upd->update; - - idx = offset - entry->ofs; - entry->pt_entries[idx].pt = xe_child; - entry->pt_entries[idx].pte = pte; - entry->qwords++; - } - - return 0; -} - -static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, - struct xe_pt_stage_bind_walk *xe_walk) -{ - u64 size, dma; - - if (level > MAX_HUGEPTE_LEVEL) - return false; - - /* Does the virtual range requested cover a huge pte? */ - if (!xe_pt_covers(addr, next, level, &xe_walk->base)) - return false; - - /* Does the DMA segment cover the whole pte? */ - if (next - xe_walk->va_curs_start > xe_walk->curs->size) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - /* Is the DMA address huge PTE size aligned? */ - size = next - addr; - dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); - - return IS_ALIGNED(dma, size); -} - -/* - * Scan the requested mapping to check whether it can be done entirely - * with 64K PTEs. - */ -static bool -xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - struct xe_res_cursor curs = *xe_walk->curs; - - if (!IS_ALIGNED(addr, SZ_64K)) - return false; - - if (next > xe_walk->l0_end_addr) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - xe_res_next(&curs, addr - xe_walk->va_curs_start); - for (; addr < next; addr += SZ_64K) { - if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) - return false; - - xe_res_next(&curs, SZ_64K); - } - - return addr == next; -} - -/* - * For non-compact "normal" 4K level-0 pagetables, we want to try to group - * addresses together in 64K-contigous regions to add a 64K TLB hint for the - * device to the PTE. - * This function determines whether the address is part of such a - * segment. For VRAM in normal pagetables, this is strictly necessary on - * some devices. - */ -static bool -xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - /* Address is within an already found 64k region */ - if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) - return true; - - xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); - xe_walk->addr_64K = addr; - - return xe_walk->found_64K; -} - -static int -xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_bind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - u16 pat_index = xe_walk->vma->pat_index; - struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); - struct xe_vm *vm = xe_walk->vm; - struct xe_pt *xe_child; - bool covers; - int ret = 0; - u64 pte; - - /* Is this a leaf entry ?*/ - if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { - struct xe_res_cursor *curs = xe_walk->curs; - bool is_null = xe_vma_is_null(xe_walk->vma); - - XE_WARN_ON(xe_walk->va_curs_start != addr); - - pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : - xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->vma, pat_index, level); - pte |= xe_walk->default_pte; - - /* - * Set the XE_PTE_PS64 hint if possible, otherwise if - * this device *requires* 64K PTE size for VRAM, fail. - */ - if (level == 0 && !xe_parent->is_compact) { - if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; - pte |= XE_PTE_PS64; - } else if (XE_WARN_ON(xe_walk->needs_64K)) { - return -EINVAL; - } - } - - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); - if (unlikely(ret)) - return ret; - - if (!is_null) - xe_res_next(curs, next - addr); - xe_walk->va_curs_start = next; - xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); - *action = ACTION_CONTINUE; - - return ret; - } - - /* - * Descending to lower level. Determine if we need to allocate a - * new page table or -directory, which we do if there is no - * previous one or there is one we can completely replace. - */ - if (level == 1) { - walk->shifts = xe_normal_pt_shifts; - xe_walk->l0_end_addr = next; - } - - covers = xe_pt_covers(addr, next, level, &xe_walk->base); - if (covers || !*child) { - u64 flags = 0; - - xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); - if (IS_ERR(xe_child)) - return PTR_ERR(xe_child); - - xe_pt_set_addr(xe_child, - round_down(addr, 1ull << walk->shifts[level])); - - if (!covers) - xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child); - - *child = &xe_child->base; - - /* - * Prefer the compact pagetable layout for L0 if possible. Only - * possible if VMA covers entire 2MB region as compact 64k and - * 4k pages cannot be mixed within a 2MB region. - * TODO: Suballocate the pt bo to avoid wasting a lot of - * memory. - */ - if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && - covers && xe_pt_scan_64K(addr, next, xe_walk)) { - walk->shifts = xe_compact_pt_shifts; - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT; - flags |= XE_PDE_64K; - xe_child->is_compact = true; - } - - pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, - pte); - } - - *action = ACTION_SUBTREE; - return ret; -} - -static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { - .pt_entry = xe_pt_stage_bind_entry, -}; - -/** - * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address - * range. - * @tile: The tile we're building for. - * @vma: The vma indicating the address range. - * @entries: Storage for the update entries used for connecting the tree to - * the main tree at commit time. - * @num_entries: On output contains the number of @entries used. - * - * This function builds a disconnected page-table tree for a given address - * range. The tree is connected to the main vm tree for the gpu using - * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). - * The function builds xe_vm_pgtable_update structures for already existing - * shared page-tables, and non-existing shared and non-shared page-tables - * are built and populated directly. - * - * Return 0 on success, negative error code on error. - */ -static int -xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - struct xe_device *xe = tile_to_xe(tile); - struct xe_bo *bo = xe_vma_bo(vma); - bool is_devmem = !xe_vma_is_userptr(vma) && bo && - (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); - struct xe_res_cursor curs; - struct xe_pt_stage_bind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_bind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .vm = xe_vma_vm(vma), - .tile = tile, - .curs = &curs, - .va_curs_start = xe_vma_start(vma), - .vma = vma, - .wupd.entries = entries, - .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - int ret; - - /** - * Default atomic expectations for different allocation scenarios are as follows: - * - * 1. Traditional API: When the VM is not in LR mode: - * - Device atomics are expected to function with all allocations. - * - * 2. Compute/SVM API: When the VM is in LR mode: - * - Device atomics are the default behavior when the bo is placed in a single region. - * - In all other cases device atomics will be disabled with AE=0 until an application - * request differently using a ioctl like madvise. - */ - if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { - if (xe_vm_in_lr_mode(xe_vma_vm(vma))) { - if (bo && xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - /** - * If a SMEM+LMEM allocation is backed by SMEM, a device - * atomics will cause a gpu page fault and which then - * gets migrated to LMEM, bind such allocations with - * device atomics enabled. - */ - else if (is_devmem && !xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } else { - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } - - /** - * Unset AE if the platform(PVC) doesn't support it on an - * allocation - */ - if (!xe->info.has_device_atomics_on_smem && !is_devmem) - xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE; - } - - if (is_devmem) { - xe_walk.default_pte |= XE_PPGTT_PTE_DM; - xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); - } - - if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) - xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); - - xe_bo_assert_held(bo); - - if (!xe_vma_is_null(vma)) { - if (xe_vma_is_userptr(vma)) - xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, - xe_vma_size(vma), &curs); - else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) - xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - else - xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - } else { - curs.size = xe_vma_size(vma); - } - - ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - *num_entries = xe_walk.wupd.num_used_entries; - return ret; -} - -/** - * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a - * shared pagetable. - * @addr: The start address within the non-shared pagetable. - * @end: The end address within the non-shared pagetable. - * @level: The level of the non-shared pagetable. - * @walk: Walk info. The function adjusts the walk action. - * @action: next action to perform (see enum page_walk_action) - * @offset: Ignored on input, First non-shared entry on output. - * @end_offset: Ignored on input, Last non-shared entry + 1 on output. - * - * A non-shared page-table has some entries that belong to the address range - * and others that don't. This function determines the entries that belong - * fully to the address range. Depending on level, some entries may - * partially belong to the address range (that can't happen at level 0). - * The function detects that and adjust those offsets to not include those - * partial entries. Iff it does detect partial entries, we know that there must - * be shared page tables also at lower levels, so it adjusts the walk action - * accordingly. - * - * Return: true if there were non-shared entries, false otherwise. - */ -static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, - struct xe_pt_walk *walk, - enum page_walk_action *action, - pgoff_t *offset, pgoff_t *end_offset) -{ - u64 size = 1ull << walk->shifts[level]; - - *offset = xe_pt_offset(addr, level, walk); - *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; - - if (!level) - return true; - - /* - * If addr or next are not size aligned, there are shared pts at lower - * level, so in that case traverse down the subtree - */ - *action = ACTION_CONTINUE; - if (!IS_ALIGNED(addr, size)) { - *action = ACTION_SUBTREE; - (*offset)++; - } - - if (!IS_ALIGNED(end, size)) { - *action = ACTION_SUBTREE; - (*end_offset)--; - } - - return *end_offset > *offset; -} - -struct xe_pt_zap_ptes_walk { - /** @base: The walk base-class */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're building for */ - struct xe_tile *tile; - - /* Output */ - /** @needs_invalidate: Whether we need to invalidate TLB*/ - bool needs_invalidate; -}; - -static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_zap_ptes_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - /* - * Note that we're called from an entry callback, and we're dealing - * with the child of that entry rather than the parent, so need to - * adjust level down. - */ - if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, - &end_offset)) { - xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap, - offset * sizeof(u64), 0, - (end_offset - offset) * sizeof(u64)); - xe_walk->needs_invalidate = true; - } - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { - .pt_entry = xe_pt_zap_ptes_entry, -}; - -/** - * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range - * @tile: The tile we're zapping for. - * @vma: GPU VMA detailing address range. - * - * Eviction and Userptr invalidation needs to be able to zap the - * gpu ptes of a given address range in pagefaulting mode. - * In order to be able to do that, that function needs access to the shared - * page-table entrieaso it can either clear the leaf PTEs or - * clear the pointers to lower-level page-tables. The caller is required - * to hold the necessary locks to ensure neither the page-table connectivity - * nor the page-table entries of the range is updated from under us. - * - * Return: Whether ptes were actually updated and a TLB invalidation is - * required. - */ -bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) -{ - struct xe_pt_zap_ptes_walk xe_walk = { - .base = { - .ops = &xe_pt_zap_ptes_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); - - if (!(pt_mask & BIT(tile->id))) - return false; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.needs_invalidate; -} - -static void -xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, - struct iosys_map *map, void *data, - u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_pt_entry *ptes = update->pt_entries; - u64 *ptr = data; - u32 i; - - for (i = 0; i < num_qwords; i++) { - if (map) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, ptes[i].pte); - else - ptr[i] = ptes[i].pte; - } -} - -static void xe_pt_abort_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i, j; - - for (i = 0; i < num_entries; i++) { - if (!entries[i].pt_entries) - continue; - - for (j = 0; j < entries[i].qwords; j++) - xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL); - kfree(entries[i].pt_entries); - } -} - -static void xe_pt_commit_locks_assert(struct xe_vma *vma) -{ - struct xe_vm *vm = xe_vma_vm(vma); - - lockdep_assert_held(&vm->lock); - - if (!xe_vma_is_userptr(vma) && !xe_vma_is_null(vma)) - dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); - - xe_vm_assert_held(vm); -} - -static void xe_pt_commit_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries, bool rebind, - struct llist_head *deferred) -{ - u32 i, j; - - xe_pt_commit_locks_assert(vma); - - for (i = 0; i < num_entries; i++) { - struct xe_pt *pt = entries[i].pt; - struct xe_pt_dir *pt_dir; - - if (!rebind) - pt->num_live += entries[i].qwords; - - if (!pt->level) - continue; - - pt_dir = as_xe_pt_dir(pt); - for (j = 0; j < entries[i].qwords; j++) { - u32 j_ = j + entries[i].ofs; - struct xe_pt *newpte = entries[i].pt_entries[j].pt; - - if (xe_pt_entry(pt_dir, j_)) - xe_pt_destroy(xe_pt_entry(pt_dir, j_), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[j_] = &newpte->base; - } - } -} - -static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i; - - for (i = 0; i < num_entries; i++) - kfree(entries[i].pt_entries); -} - -static int -xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - int err; - - *num_entries = 0; - err = xe_pt_stage_bind(tile, vma, entries, num_entries); - if (!err) - xe_tile_assert(tile, *num_entries); - else /* abort! */ - xe_pt_abort_bind(vma, entries, *num_entries); - - return err; -} - -static void xe_vm_dbg_print_entries(struct xe_device *xe, - const struct xe_vm_pgtable_update *entries, - unsigned int num_entries, bool bind) -#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) -{ - unsigned int i; - - vm_dbg(&xe->drm, "%s: %u entries to update\n", bind ? "bind" : "unbind", - num_entries); - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - struct xe_pt *xe_pt = entry->pt; - u64 page_size = 1ull << xe_pt_shift(xe_pt->level); - u64 end; - u64 start; - - xe_assert(xe, !entry->pt->is_compact); - start = entry->ofs * page_size; - end = start + page_size * entry->qwords; - vm_dbg(&xe->drm, - "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", - i, xe_pt->level, entry->ofs, entry->qwords, - xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); - } -} -#else -{} -#endif - -static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs) -{ - int i; - - for (i = 0; i < num_syncs; i++) { - struct dma_fence *fence = syncs[i].fence; - - if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &fence->flags)) - return false; - } - - return true; -} - -static int job_test_add_deps(struct xe_sched_job *job, - struct dma_resv *resv, - enum dma_resv_usage usage) -{ - if (!job) { - if (!dma_resv_test_signaled(resv, usage)) - return -ETIME; - - return 0; - } - - return xe_sched_job_add_deps(job, resv, usage); -} - -static int vma_add_deps(struct xe_vma *vma, struct xe_sched_job *job) -{ - struct xe_bo *bo = xe_vma_bo(vma); - - xe_bo_assert_held(bo); - - if (bo && !bo->vm) - return job_test_add_deps(job, bo->ttm.base.resv, - DMA_RESV_USAGE_KERNEL); - - return 0; -} - -static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op, - struct xe_sched_job *job) -{ - int err = 0; - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = vma_add_deps(op->map.vma, job); - break; - case DRM_GPUVA_OP_REMAP: - if (op->remap.prev) - err = vma_add_deps(op->remap.prev, job); - if (!err && op->remap.next) - err = vma_add_deps(op->remap.next, job); - break; - case DRM_GPUVA_OP_UNMAP: - break; - case DRM_GPUVA_OP_PREFETCH: - err = vma_add_deps(gpuva_to_vma(op->base.prefetch.va), job); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static int xe_pt_vm_dependencies(struct xe_sched_job *job, - struct xe_vm *vm, - struct xe_vma_ops *vops, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_range_fence_tree *rftree) -{ - struct xe_range_fence *rtfence; - struct dma_fence *fence; - struct xe_vma_op *op; - int err = 0, i; - - xe_vm_assert_held(vm); - - if (!job && !no_in_syncs(vops->syncs, vops->num_syncs)) - return -ETIME; - - if (!job && !xe_exec_queue_is_idle(pt_update_ops->q)) - return -ETIME; - - if (pt_update_ops->wait_vm_bookkeep || pt_update_ops->wait_vm_kernel) { - err = job_test_add_deps(job, xe_vm_resv(vm), - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_BOOKKEEP : - DMA_RESV_USAGE_KERNEL); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_first(rftree, pt_update_ops->start, - pt_update_ops->last); - while (rtfence) { - fence = rtfence->fence; - - if (!dma_fence_is_signaled(fence)) { - /* - * Is this a CPU update? GPU is busy updating, so return - * an error - */ - if (!job) - return -ETIME; - - dma_fence_get(fence); - err = drm_sched_job_add_dependency(&job->drm, fence); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_next(rtfence, - pt_update_ops->start, - pt_update_ops->last); - } - - list_for_each_entry(op, &vops->list, link) { - err = op_add_deps(vm, op, job); - if (err) - return err; - } - - if (job) - err = xe_sched_job_last_fence_add_dep(job, vm); - else - err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); - - for (i = 0; job && !err && i < vops->num_syncs; i++) - err = xe_sync_entry_add_deps(&vops->syncs[i], job); - - return err; -} - -static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_vma_ops *vops = pt_update->vops; - struct xe_vm *vm = vops->vm; - struct xe_range_fence_tree *rftree = &vm->rftree[pt_update->tile_id]; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[pt_update->tile_id]; - - return xe_pt_vm_dependencies(pt_update->job, vm, pt_update->vops, - pt_update_ops, rftree); -} - -#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; - static u32 count; - - if (count++ % divisor == divisor - 1) { - uvma->userptr.divisor = divisor << 1; - return true; - } - - return false; -} - -#else - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - return false; -} - -#endif - -static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, - struct xe_vm_pgtable_update_ops *pt_update) -{ - struct xe_userptr_vma *uvma; - unsigned long notifier_seq; - - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - if (!xe_vma_is_userptr(vma)) - return 0; - - uvma = to_userptr_vma(vma); - notifier_seq = uvma->userptr.notifier_seq; - - if (uvma->userptr.initial_bind && !xe_vm_in_fault_mode(vm)) - return 0; - - if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq) && - !xe_pt_userptr_inject_eagain(uvma)) - return 0; - - if (xe_vm_in_fault_mode(vm)) { - return -EAGAIN; - } else { - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - - if (xe_vm_in_preempt_fence_mode(vm)) { - struct dma_resv_iter cursor; - struct dma_fence *fence; - long err; - - dma_resv_iter_begin(&cursor, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); - dma_resv_for_each_fence_unlocked(&cursor, fence) - dma_fence_enable_sw_signaling(fence); - dma_resv_iter_end(&cursor); - - err = dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - XE_WARN_ON(err <= 0); - } - } - - return 0; -} - -static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op, - struct xe_vm_pgtable_update_ops *pt_update) -{ - int err = 0; - - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = vma_check_userptr(vm, op->map.vma, pt_update); - break; - case DRM_GPUVA_OP_REMAP: - if (op->remap.prev) - err = vma_check_userptr(vm, op->remap.prev, pt_update); - if (!err && op->remap.next) - err = vma_check_userptr(vm, op->remap.next, pt_update); - break; - case DRM_GPUVA_OP_UNMAP: - break; - case DRM_GPUVA_OP_PREFETCH: - err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), - pt_update); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_vm *vm = pt_update->vops->vm; - struct xe_vma_ops *vops = pt_update->vops; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[pt_update->tile_id]; - struct xe_vma_op *op; - int err; - - err = xe_pt_pre_commit(pt_update); - if (err) - return err; - - down_read(&vm->userptr.notifier_lock); - - list_for_each_entry(op, &vops->list, link) { - err = op_check_userptr(vm, op, pt_update_ops); - if (err) { - up_read(&vm->userptr.notifier_lock); - break; - } - } - - return err; -} - -struct invalidation_fence { - struct xe_gt_tlb_invalidation_fence base; - struct xe_gt *gt; - struct dma_fence *fence; - struct dma_fence_cb cb; - struct work_struct work; - u64 start; - u64 end; - u32 asid; -}; - -static void invalidation_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct invalidation_fence *ifence = - container_of(cb, struct invalidation_fence, cb); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); - if (!ifence->fence->error) { - queue_work(system_wq, &ifence->work); - } else { - ifence->base.base.error = ifence->fence->error; - dma_fence_signal(&ifence->base.base); - dma_fence_put(&ifence->base.base); - } - dma_fence_put(ifence->fence); -} - -static void invalidation_fence_work_func(struct work_struct *w) -{ - struct invalidation_fence *ifence = - container_of(w, struct invalidation_fence, work); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); - xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, - ifence->end, ifence->asid); -} - -static int invalidation_fence_init(struct xe_gt *gt, - struct invalidation_fence *ifence, - struct dma_fence *fence, - u64 start, u64 end, u32 asid) -{ - int ret; - - trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - - ifence->fence = fence; - ifence->gt = gt; - ifence->start = start; - ifence->end = end; - ifence->asid = asid; - - INIT_WORK(&ifence->work, invalidation_fence_work_func); - ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); - if (ret == -ENOENT) { - dma_fence_put(ifence->fence); /* Usually dropped in CB */ - invalidation_fence_work_func(&ifence->work); - } else if (ret) { - dma_fence_put(&ifence->base.base); /* Caller ref */ - dma_fence_put(&ifence->base.base); /* Creation ref */ - } - - xe_gt_assert(gt, !ret || ret == -ENOENT); - - return ret && ret != -ENOENT ? ret : 0; -} - -struct xe_pt_stage_unbind_walk { - /** @base: The pagewalk base-class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're unbinding from. */ - struct xe_tile *tile; - - /** - * @modified_start: Walk range start, modified to include any - * shared pagetables that we're the only user of and can thus - * treat as private. - */ - u64 modified_start; - /** @modified_end: Walk range start, modified like @modified_start. */ - u64 modified_end; - - /* Output */ - /* @wupd: Structure to track the page-table updates we're building */ - struct xe_walk_update wupd; -}; - -/* - * Check whether this range is the only one populating this pagetable, - * and in that case, update the walk range checks so that higher levels don't - * view us as a shared pagetable. - */ -static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, - const struct xe_pt *child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - unsigned int shift = walk->shifts[level]; - u64 size = 1ull << shift; - - if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && - ((next - addr) >> shift) == child->num_live) { - u64 size = 1ull << walk->shifts[level + 1]; - - *action = ACTION_CONTINUE; - - if (xe_walk->modified_start >= addr) - xe_walk->modified_start = round_down(addr, size); - if (xe_walk->modified_end <= next) - xe_walk->modified_end = round_up(next, size); - - return true; - } - - return false; -} - -static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); - - return 0; -} - -static int -xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - u64 size = 1ull << walk->shifts[--level]; - - if (!IS_ALIGNED(addr, size)) - addr = xe_walk->modified_start; - if (!IS_ALIGNED(next, size)) - next = xe_walk->modified_end; - - /* Parent == *child is the root pt. Don't kill it. */ - if (parent != *child && - xe_pt_check_kill(addr, next, level, xe_child, action, walk)) - return 0; - - if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, - &end_offset)) - return 0; - - (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false); - xe_walk->wupd.updates[level].update->qwords = end_offset - offset; - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { - .pt_entry = xe_pt_stage_unbind_entry, - .pt_post_descend = xe_pt_stage_unbind_post_descend, -}; - -/** - * xe_pt_stage_unbind() - Build page-table update structures for an unbind - * operation - * @tile: The tile we're unbinding for. - * @vma: The vma we're unbinding. - * @entries: Caller-provided storage for the update structures. - * - * Builds page-table update structures for an unbind operation. The function - * will attempt to remove all page-tables that we're the only user - * of, and for that to work, the unbind operation must be committed in the - * same critical section that blocks racing binds to the same page-table tree. - * - * Return: The number of entries used. - */ -static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries) -{ - struct xe_pt_stage_unbind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_unbind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - .modified_start = xe_vma_start(vma), - .modified_end = xe_vma_end(vma), - .wupd.entries = entries, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.wupd.num_used_entries; -} - -static void -xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, - struct xe_tile *tile, struct iosys_map *map, - void *ptr, u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_vm *vm = pt_update->vops->vm; - u64 empty = __xe_pt_empty_pte(tile, vm, update->pt->level); - int i; - - if (map && map->is_iomem) - for (i = 0; i < num_qwords; ++i) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, empty); - else if (map) - memset64(map->vaddr + qword_ofs * sizeof(u64), empty, - num_qwords); - else - memset64(ptr, empty, num_qwords); -} - -static void -xe_pt_commit_unbind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 num_entries, - struct llist_head *deferred) -{ - u32 j; - - xe_pt_commit_locks_assert(vma); - - for (j = 0; j < num_entries; ++j) { - struct xe_vm_pgtable_update *entry = &entries[j]; - struct xe_pt *pt = entry->pt; - - pt->num_live -= entry->qwords; - if (pt->level) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - u32 i; - - for (i = entry->ofs; i < entry->ofs + entry->qwords; - i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[i] = NULL; - } - } - } -} - -static void -xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int i, level = 0; - u64 start, last; - - for (i = 0; i < pt_op->num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &pt_op->entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level)); - last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1; - - if (start < pt_update_ops->start) - pt_update_ops->start = start; - if (last > pt_update_ops->last) - pt_update_ops->last = last; -} - -static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) -{ - int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0; - - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, - xe->info.tile_count << shift); - - return 0; -} - -static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - struct llist_head *deferred = &pt_update_ops->deferred; - int err; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - pt_op->vma = NULL; - pt_op->bind = true; - pt_op->rebind = BIT(tile->id) & vma->tile_present; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - err = xe_pt_prepare_bind(tile, vma, pt_op->entries, - &pt_op->num_entries); - if (!err) { - xe_tile_assert(tile, pt_op->num_entries <= - ARRAY_SIZE(pt_op->entries)); - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, true); - - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. On LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * On !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!pt_op->rebind && xe_vm_has_scratch(vm) && - xe_vm_in_preempt_fence_mode(vm))) - pt_update_ops->needs_invalidation = true; - else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - - /* FIXME: Don't commit right away */ - vma->tile_staged |= BIT(tile->id); - pt_op->vma = vma; - xe_pt_commit_bind(vma, pt_op->entries, pt_op->num_entries, - pt_op->rebind, deferred); - } - - return err; -} - -static int unbind_op_prepare(struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - struct llist_head *deferred = &pt_update_ops->deferred; - int err; - - if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id))) - return 0; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - /* - * Wait for invalidation to complete. Can corrupt internal page table - * state if an invalidation is running while preparing an unbind. - */ - if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma))) - mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier); - - pt_op->vma = vma; - pt_op->bind = false; - pt_op->rebind = false; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries); - - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, false); - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - pt_update_ops->needs_invalidation = true; - - /* FIXME: Don't commit right away */ - xe_pt_commit_unbind(vma, pt_op->entries, pt_op->num_entries, - deferred); - - return 0; -} - -static int op_prepare(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op) -{ - int err = 0; - - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma); - pt_update_ops->wait_vm_kernel = true; - break; - case DRM_GPUVA_OP_REMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va)); - - if (!err && op->remap.prev) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.prev); - pt_update_ops->wait_vm_bookkeep = true; - } - if (!err && op->remap.next) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.next); - pt_update_ops->wait_vm_bookkeep = true; - } - break; - case DRM_GPUVA_OP_UNMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va)); - break; - case DRM_GPUVA_OP_PREFETCH: - err = bind_op_prepare(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va)); - pt_update_ops->wait_vm_kernel = true; - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static void -xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops) -{ - init_llist_head(&pt_update_ops->deferred); - pt_update_ops->start = ~0x0ull; - pt_update_ops->last = 0x0ull; -} - -/** - * xe_pt_update_ops_prepare() - Prepare PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Prepare PT update operations which includes updating internal PT state, - * allocate memory for page tables, populate page table being pruned in, and - * create PT update operations for leaf insertion / removal. - * - * Return: 0 on success, negative error code on error. - */ -int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct xe_vma_op *op; - int shift = tile->media_gt ? 1 : 0; - int err; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - xe_pt_update_ops_init(pt_update_ops); - - err = dma_resv_reserve_fences(xe_vm_resv(vops->vm), - tile_to_xe(tile)->info.tile_count << shift); - if (err) - return err; - - list_for_each_entry(op, &vops->list, link) { - err = op_prepare(vops->vm, tile, pt_update_ops, op); - - if (err) - return err; - } - - xe_tile_assert(tile, pt_update_ops->current_op <= - pt_update_ops->num_ops); - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE) - return -ENOSPC; -#endif - - return 0; -} - -static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present |= BIT(tile->id); - vma->tile_staged &= ~BIT(tile->id); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - to_userptr_vma(vma)->userptr.initial_bind = true; - } - - /* - * Kick rebind worker if this bind triggers preempt fences and not in - * the rebind worker - */ - if (pt_update_ops->wait_vm_bookkeep && - xe_vm_in_preempt_fence_mode(vm) && - !current->mm) - xe_vm_queue_rebind_worker(vm); -} - -static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present &= ~BIT(tile->id); - if (!vma->tile_present) { - list_del_init(&vma->combined_links.rebind); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - } -} - -static void op_commit(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op, struct dma_fence *fence, - struct dma_fence *fence2) -{ - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, - fence2); - break; - case DRM_GPUVA_OP_REMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence, - fence2); - - if (op->remap.prev) - bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence, fence2); - if (op->remap.next) - bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence, fence2); - break; - case DRM_GPUVA_OP_UNMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence, fence2); - break; - case DRM_GPUVA_OP_PREFETCH: - bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence, fence2); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } -} - -static const struct xe_migrate_pt_update_ops migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -/** - * xe_pt_update_ops_run() - Run PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Run PT update operations which includes committing internal PT state changes, - * creating job for PT update operations for leaf insertion / removal, and - * installing job fence in various places. - * - * Return: fence on success, negative ERR_PTR on error. - */ -struct dma_fence * -xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm *vm = vops->vm; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL, *mfence = NULL; - struct dma_fence **fences = NULL; - struct dma_fence_array *cf = NULL; - struct xe_range_fence *rfence; - struct xe_vma_op *op; - int err = 0; - struct xe_migrate_pt_update update = { - .ops = pt_update_ops->needs_userptr_lock ? - &userptr_migrate_ops : - &migrate_ops, - .vops = vops, - .tile_id = tile->id, - }; - - lockdep_assert_held(&vm->lock); - xe_vm_assert_held(vm); - - if (!pt_update_ops->current_op) { - xe_tile_assert(tile, xe_vm_in_fault_mode(vm)); - - return dma_fence_get_stub(); - } - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vm->xe->vm_inject_error_position == FORCE_OP_ERROR_RUN) - return ERR_PTR(-ENOSPC); -#endif - - if (pt_update_ops->needs_invalidation) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); -<<<<<<< - if (!ifence) - return ERR_PTR(-ENOMEM); -======= - if (!ifence) { - err = -ENOMEM; - goto kill_vm_tile1; - } - if (tile->media_gt) { - mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!mfence) { - err = -ENOMEM; - goto free_ifence; - } - fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL); - if (!fences) { - err = -ENOMEM; - goto free_ifence; - } - cf = dma_fence_array_alloc(2); - if (!cf) { - err = -ENOMEM; - goto free_ifence; - } - } ->>>>>>> - } - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - err = -ENOMEM; - goto free_ifence; - } - - fence = xe_migrate_update_pgtables(tile->migrate, &update); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - goto free_rfence; - } - - if (xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - pt_update_ops->start, - pt_update_ops->last, fence)) - dma_fence_wait(fence, false); - - /* tlb invalidation must be done before signaling rebind */ - if (ifence) { -<<<<<<< - err = invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, - vm->usm.asid); - if (err) - goto put_fence; - fence = &ifence->base.base; -======= - if (mfence) - dma_fence_get(fence); - invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - if (mfence) { - invalidation_fence_init(tile->media_gt, mfence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - fences[0] = &ifence->base.base; - fences[1] = &mfence->base.base; - dma_fence_array_init(cf, 2, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - fence = &cf->base; - } else { - fence = &ifence->base.base; - } ->>>>>>> - } - - if (!mfence) { - dma_resv_add_fence(xe_vm_resv(vm), fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); - } else { - dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, - &ifence->base.base, &mfence->base.base); - } - - if (pt_update_ops->needs_userptr_lock) - up_read(&vm->userptr.notifier_lock); - - return fence; - -put_fence: - if (pt_update_ops->needs_userptr_lock) - up_read(&vm->userptr.notifier_lock); - dma_fence_put(fence); -free_rfence: - kfree(rfence); -free_ifence: - kfree(cf); - kfree(fences); - kfree(mfence); - kfree(ifence); - - return ERR_PTR(err); -} - -/** - * xe_pt_update_ops_fini() - Finish PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operations - * - * Finish PT update operations by committing to destroy page table memory - */ -void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - /* FIXME: Not 100% correct */ - for (i = 0; i < pt_update_ops->num_ops; ++i) { - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; - - if (pt_op->bind) - xe_pt_free_bind(pt_op->entries, pt_op->num_entries); - } - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); -} - -/** - * xe_pt_update_ops_abort() - Abort PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Abort PT update operations by unwinding internal PT state - */ -void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - /* FIXME: Just kill VM for now + cleanup PTs */ - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); - xe_vm_kill(vops->vm, false); -} diff --git a/rr-cache/bc3bc775dae1d19bda4dd320b9463ba777208695/preimage.2 b/rr-cache/bc3bc775dae1d19bda4dd320b9463ba777208695/preimage.2 deleted file mode 100644 index bbb3bac849b2..000000000000 --- a/rr-cache/bc3bc775dae1d19bda4dd320b9463ba777208695/preimage.2 +++ /dev/null @@ -1,2102 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include <linux/dma-fence-array.h> - -#include "xe_pt.h" - -#include "regs/xe_gtt_defs.h" -#include "xe_bo.h" -#include "xe_device.h" -#include "xe_drm_client.h" -#include "xe_exec_queue.h" -#include "xe_gt.h" -#include "xe_gt_tlb_invalidation.h" -#include "xe_migrate.h" -#include "xe_pt_types.h" -#include "xe_pt_walk.h" -#include "xe_res_cursor.h" -#include "xe_sched_job.h" -#include "xe_sync.h" -#include "xe_trace.h" -#include "xe_ttm_stolen_mgr.h" -#include "xe_vm.h" - -struct xe_pt_dir { - struct xe_pt pt; - /** @children: Array of page-table child nodes */ - struct xe_ptw *children[XE_PDES]; -}; - -#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM) -#define xe_pt_set_addr(__xe_pt, __addr) ((__xe_pt)->addr = (__addr)) -#define xe_pt_addr(__xe_pt) ((__xe_pt)->addr) -#else -#define xe_pt_set_addr(__xe_pt, __addr) -#define xe_pt_addr(__xe_pt) 0ull -#endif - -static const u64 xe_normal_pt_shifts[] = {12, 21, 30, 39, 48}; -static const u64 xe_compact_pt_shifts[] = {16, 21, 30, 39, 48}; - -#define XE_PT_HIGHEST_LEVEL (ARRAY_SIZE(xe_normal_pt_shifts) - 1) - -static struct xe_pt_dir *as_xe_pt_dir(struct xe_pt *pt) -{ - return container_of(pt, struct xe_pt_dir, pt); -} - -static struct xe_pt *xe_pt_entry(struct xe_pt_dir *pt_dir, unsigned int index) -{ - return container_of(pt_dir->children[index], struct xe_pt, base); -} - -static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm, - unsigned int level) -{ - struct xe_device *xe = tile_to_xe(tile); - u16 pat_index = xe->pat.idx[XE_CACHE_WB]; - u8 id = tile->id; - - if (!xe_vm_has_scratch(vm)) - return 0; - - if (level > MAX_HUGEPTE_LEVEL) - return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo, - 0, pat_index); - - return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) | - XE_PTE_NULL; -} - -static void xe_pt_free(struct xe_pt *pt) -{ - if (pt->level) - kfree(as_xe_pt_dir(pt)); - else - kfree(pt); -} - -/** - * xe_pt_create() - Create a page-table. - * @vm: The vm to create for. - * @tile: The tile to create for. - * @level: The page-table level. - * - * Allocate and initialize a single struct xe_pt metadata structure. Also - * create the corresponding page-table bo, but don't initialize it. If the - * level is grater than zero, then it's assumed to be a directory page- - * table and the directory structure is also allocated and initialized to - * NULL pointers. - * - * Return: A valid struct xe_pt pointer on success, Pointer error code on - * error. - */ -struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile, - unsigned int level) -{ - struct xe_pt *pt; - struct xe_bo *bo; - int err; - - if (level) { - struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL); - - pt = (dir) ? &dir->pt : NULL; - } else { - pt = kzalloc(sizeof(*pt), GFP_KERNEL); - } - if (!pt) - return ERR_PTR(-ENOMEM); - - pt->level = level; - bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K, - ttm_bo_type_kernel, - XE_BO_FLAG_VRAM_IF_DGFX(tile) | - XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE | - XE_BO_FLAG_PINNED | - XE_BO_FLAG_NO_RESV_EVICT | - XE_BO_FLAG_PAGETABLE); - if (IS_ERR(bo)) { - err = PTR_ERR(bo); - goto err_kfree; - } - pt->bo = bo; - pt->base.children = level ? as_xe_pt_dir(pt)->children : NULL; - - if (vm->xef) - xe_drm_client_add_bo(vm->xef->client, pt->bo); - xe_tile_assert(tile, level <= XE_VM_MAX_LEVEL); - - return pt; - -err_kfree: - xe_pt_free(pt); - return ERR_PTR(err); -} - -/** - * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero - * entries. - * @tile: The tile the scratch pagetable of which to use. - * @vm: The vm we populate for. - * @pt: The pagetable the bo of which to initialize. - * - * Populate the page-table bo of @pt with entries pointing into the tile's - * scratch page-table tree if any. Otherwise populate with zeros. - */ -void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm, - struct xe_pt *pt) -{ - struct iosys_map *map = &pt->bo->vmap; - u64 empty; - int i; - - if (!xe_vm_has_scratch(vm)) { - /* - * FIXME: Some memory is allocated already allocated to zero? - * Find out which memory that is and avoid this memset... - */ - xe_map_memset(vm->xe, map, 0, 0, SZ_4K); - } else { - empty = __xe_pt_empty_pte(tile, vm, pt->level); - for (i = 0; i < XE_PDES; i++) - xe_pt_write(vm->xe, map, i, empty); - } -} - -/** - * xe_pt_shift() - Return the ilog2 value of the size of the address range of - * a page-table at a certain level. - * @level: The level. - * - * Return: The ilog2 value of the size of the address range of a page-table - * at level @level. - */ -unsigned int xe_pt_shift(unsigned int level) -{ - return XE_PTE_SHIFT + XE_PDE_SHIFT * level; -} - -/** - * xe_pt_destroy() - Destroy a page-table tree. - * @pt: The root of the page-table tree to destroy. - * @flags: vm flags. Currently unused. - * @deferred: List head of lockless list for deferred putting. NULL for - * immediate putting. - * - * Puts the page-table bo, recursively calls xe_pt_destroy on all children - * and finally frees @pt. TODO: Can we remove the @flags argument? - */ -void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head *deferred) -{ - int i; - - if (!pt) - return; - - XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list)); - xe_bo_unpin(pt->bo); - xe_bo_put_deferred(pt->bo, deferred); - - if (pt->level > 0 && pt->num_live) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - - for (i = 0; i < XE_PDES; i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), flags, - deferred); - } - } - xe_pt_free(pt); -} - -/** - * DOC: Pagetable building - * - * Below we use the term "page-table" for both page-directories, containing - * pointers to lower level page-directories or page-tables, and level 0 - * page-tables that contain only page-table-entries pointing to memory pages. - * - * When inserting an address range in an already existing page-table tree - * there will typically be a set of page-tables that are shared with other - * address ranges, and a set that are private to this address range. - * The set of shared page-tables can be at most two per level, - * and those can't be updated immediately because the entries of those - * page-tables may still be in use by the gpu for other mappings. Therefore - * when inserting entries into those, we instead stage those insertions by - * adding insertion data into struct xe_vm_pgtable_update structures. This - * data, (subtrees for the cpu and page-table-entries for the gpu) is then - * added in a separate commit step. CPU-data is committed while still under the - * vm lock, the object lock and for userptr, the notifier lock in read mode. - * The GPU async data is committed either by the GPU or CPU after fulfilling - * relevant dependencies. - * For non-shared page-tables (and, in fact, for shared ones that aren't - * existing at the time of staging), we add the data in-place without the - * special update structures. This private part of the page-table tree will - * remain disconnected from the vm page-table tree until data is committed to - * the shared page tables of the vm tree in the commit phase. - */ - -struct xe_pt_update { - /** @update: The update structure we're building for this parent. */ - struct xe_vm_pgtable_update *update; - /** @parent: The parent. Used to detect a parent change. */ - struct xe_pt *parent; - /** @preexisting: Whether the parent was pre-existing or allocated */ - bool preexisting; -}; - -struct xe_pt_stage_bind_walk { - /** base: The base class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @vm: The vm we're building for. */ - struct xe_vm *vm; - /** @tile: The tile we're building for. */ - struct xe_tile *tile; - /** @default_pte: PTE flag only template. No address is associated */ - u64 default_pte; - /** @dma_offset: DMA offset to add to the PTE. */ - u64 dma_offset; - /** - * @needs_64k: This address range enforces 64K alignment and - * granularity. - */ - bool needs_64K; - /** - * @vma: VMA being mapped - */ - struct xe_vma *vma; - - /* Also input, but is updated during the walk*/ - /** @curs: The DMA address cursor. */ - struct xe_res_cursor *curs; - /** @va_curs_start: The Virtual address coresponding to @curs->start */ - u64 va_curs_start; - - /* Output */ - struct xe_walk_update { - /** @wupd.entries: Caller provided storage. */ - struct xe_vm_pgtable_update *entries; - /** @wupd.num_used_entries: Number of update @entries used. */ - unsigned int num_used_entries; - /** @wupd.updates: Tracks the update entry at a given level */ - struct xe_pt_update updates[XE_VM_MAX_LEVEL + 1]; - } wupd; - - /* Walk state */ - /** - * @l0_end_addr: The end address of the current l0 leaf. Used for - * 64K granularity detection. - */ - u64 l0_end_addr; - /** @addr_64K: The start address of the current 64K chunk. */ - u64 addr_64K; - /** @found_64: Whether @add_64K actually points to a 64K chunk. */ - bool found_64K; -}; - -static int -xe_pt_new_shared(struct xe_walk_update *wupd, struct xe_pt *parent, - pgoff_t offset, bool alloc_entries) -{ - struct xe_pt_update *upd = &wupd->updates[parent->level]; - struct xe_vm_pgtable_update *entry; - - /* - * For *each level*, we could only have one active - * struct xt_pt_update at any one time. Once we move on to a - * new parent and page-directory, the old one is complete, and - * updates are either already stored in the build tree or in - * @wupd->entries - */ - if (likely(upd->parent == parent)) - return 0; - - upd->parent = parent; - upd->preexisting = true; - - if (wupd->num_used_entries == XE_VM_MAX_LEVEL * 2 + 1) - return -EINVAL; - - entry = wupd->entries + wupd->num_used_entries++; - upd->update = entry; - entry->ofs = offset; - entry->pt_bo = parent->bo; - entry->pt = parent; - entry->flags = 0; - entry->qwords = 0; - entry->pt_bo->update_index = -1; - - if (alloc_entries) { - entry->pt_entries = kmalloc_array(XE_PDES, - sizeof(*entry->pt_entries), - GFP_KERNEL); - if (!entry->pt_entries) - return -ENOMEM; - } - - return 0; -} - -/* - * NOTE: This is a very frequently called function so we allow ourselves - * to annotate (using branch prediction hints) the fastpath of updating a - * non-pre-existing pagetable with leaf ptes. - */ -static int -xe_pt_insert_entry(struct xe_pt_stage_bind_walk *xe_walk, struct xe_pt *parent, - pgoff_t offset, struct xe_pt *xe_child, u64 pte) -{ - struct xe_pt_update *upd = &xe_walk->wupd.updates[parent->level]; - struct xe_pt_update *child_upd = xe_child ? - &xe_walk->wupd.updates[xe_child->level] : NULL; - int ret; - - ret = xe_pt_new_shared(&xe_walk->wupd, parent, offset, true); - if (unlikely(ret)) - return ret; - - /* - * Register this new pagetable so that it won't be recognized as - * a shared pagetable by a subsequent insertion. - */ - if (unlikely(child_upd)) { - child_upd->update = NULL; - child_upd->parent = xe_child; - child_upd->preexisting = false; - } - - if (likely(!upd->preexisting)) { - /* Continue building a non-connected subtree. */ - struct iosys_map *map = &parent->bo->vmap; - - if (unlikely(xe_child)) - parent->base.children[offset] = &xe_child->base; - - xe_pt_write(xe_walk->vm->xe, map, offset, pte); - parent->num_live++; - } else { - /* Shared pt. Stage update. */ - unsigned int idx; - struct xe_vm_pgtable_update *entry = upd->update; - - idx = offset - entry->ofs; - entry->pt_entries[idx].pt = xe_child; - entry->pt_entries[idx].pte = pte; - entry->qwords++; - } - - return 0; -} - -static bool xe_pt_hugepte_possible(u64 addr, u64 next, unsigned int level, - struct xe_pt_stage_bind_walk *xe_walk) -{ - u64 size, dma; - - if (level > MAX_HUGEPTE_LEVEL) - return false; - - /* Does the virtual range requested cover a huge pte? */ - if (!xe_pt_covers(addr, next, level, &xe_walk->base)) - return false; - - /* Does the DMA segment cover the whole pte? */ - if (next - xe_walk->va_curs_start > xe_walk->curs->size) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - /* Is the DMA address huge PTE size aligned? */ - size = next - addr; - dma = addr - xe_walk->va_curs_start + xe_res_dma(xe_walk->curs); - - return IS_ALIGNED(dma, size); -} - -/* - * Scan the requested mapping to check whether it can be done entirely - * with 64K PTEs. - */ -static bool -xe_pt_scan_64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - struct xe_res_cursor curs = *xe_walk->curs; - - if (!IS_ALIGNED(addr, SZ_64K)) - return false; - - if (next > xe_walk->l0_end_addr) - return false; - - /* null VMA's do not have dma addresses */ - if (xe_vma_is_null(xe_walk->vma)) - return true; - - xe_res_next(&curs, addr - xe_walk->va_curs_start); - for (; addr < next; addr += SZ_64K) { - if (!IS_ALIGNED(xe_res_dma(&curs), SZ_64K) || curs.size < SZ_64K) - return false; - - xe_res_next(&curs, SZ_64K); - } - - return addr == next; -} - -/* - * For non-compact "normal" 4K level-0 pagetables, we want to try to group - * addresses together in 64K-contigous regions to add a 64K TLB hint for the - * device to the PTE. - * This function determines whether the address is part of such a - * segment. For VRAM in normal pagetables, this is strictly necessary on - * some devices. - */ -static bool -xe_pt_is_pte_ps64K(u64 addr, u64 next, struct xe_pt_stage_bind_walk *xe_walk) -{ - /* Address is within an already found 64k region */ - if (xe_walk->found_64K && addr - xe_walk->addr_64K < SZ_64K) - return true; - - xe_walk->found_64K = xe_pt_scan_64K(addr, addr + SZ_64K, xe_walk); - xe_walk->addr_64K = addr; - - return xe_walk->found_64K; -} - -static int -xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_bind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - u16 pat_index = xe_walk->vma->pat_index; - struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base); - struct xe_vm *vm = xe_walk->vm; - struct xe_pt *xe_child; - bool covers; - int ret = 0; - u64 pte; - - /* Is this a leaf entry ?*/ - if (level == 0 || xe_pt_hugepte_possible(addr, next, level, xe_walk)) { - struct xe_res_cursor *curs = xe_walk->curs; - bool is_null = xe_vma_is_null(xe_walk->vma); - - XE_WARN_ON(xe_walk->va_curs_start != addr); - - pte = vm->pt_ops->pte_encode_vma(is_null ? 0 : - xe_res_dma(curs) + xe_walk->dma_offset, - xe_walk->vma, pat_index, level); - pte |= xe_walk->default_pte; - - /* - * Set the XE_PTE_PS64 hint if possible, otherwise if - * this device *requires* 64K PTE size for VRAM, fail. - */ - if (level == 0 && !xe_parent->is_compact) { - if (xe_pt_is_pte_ps64K(addr, next, xe_walk)) { - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_64K; - pte |= XE_PTE_PS64; - } else if (XE_WARN_ON(xe_walk->needs_64K)) { - return -EINVAL; - } - } - - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, NULL, pte); - if (unlikely(ret)) - return ret; - - if (!is_null) - xe_res_next(curs, next - addr); - xe_walk->va_curs_start = next; - xe_walk->vma->gpuva.flags |= (XE_VMA_PTE_4K << level); - *action = ACTION_CONTINUE; - - return ret; - } - - /* - * Descending to lower level. Determine if we need to allocate a - * new page table or -directory, which we do if there is no - * previous one or there is one we can completely replace. - */ - if (level == 1) { - walk->shifts = xe_normal_pt_shifts; - xe_walk->l0_end_addr = next; - } - - covers = xe_pt_covers(addr, next, level, &xe_walk->base); - if (covers || !*child) { - u64 flags = 0; - - xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1); - if (IS_ERR(xe_child)) - return PTR_ERR(xe_child); - - xe_pt_set_addr(xe_child, - round_down(addr, 1ull << walk->shifts[level])); - - if (!covers) - xe_pt_populate_empty(xe_walk->tile, xe_walk->vm, xe_child); - - *child = &xe_child->base; - - /* - * Prefer the compact pagetable layout for L0 if possible. Only - * possible if VMA covers entire 2MB region as compact 64k and - * 4k pages cannot be mixed within a 2MB region. - * TODO: Suballocate the pt bo to avoid wasting a lot of - * memory. - */ - if (GRAPHICS_VERx100(tile_to_xe(xe_walk->tile)) >= 1250 && level == 1 && - covers && xe_pt_scan_64K(addr, next, xe_walk)) { - walk->shifts = xe_compact_pt_shifts; - xe_walk->vma->gpuva.flags |= XE_VMA_PTE_COMPACT; - flags |= XE_PDE_64K; - xe_child->is_compact = true; - } - - pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags; - ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child, - pte); - } - - *action = ACTION_SUBTREE; - return ret; -} - -static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = { - .pt_entry = xe_pt_stage_bind_entry, -}; - -/** - * xe_pt_stage_bind() - Build a disconnected page-table tree for a given address - * range. - * @tile: The tile we're building for. - * @vma: The vma indicating the address range. - * @entries: Storage for the update entries used for connecting the tree to - * the main tree at commit time. - * @num_entries: On output contains the number of @entries used. - * - * This function builds a disconnected page-table tree for a given address - * range. The tree is connected to the main vm tree for the gpu using - * xe_migrate_update_pgtables() and for the cpu using xe_pt_commit_bind(). - * The function builds xe_vm_pgtable_update structures for already existing - * shared page-tables, and non-existing shared and non-shared page-tables - * are built and populated directly. - * - * Return 0 on success, negative error code on error. - */ -static int -xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - struct xe_device *xe = tile_to_xe(tile); - struct xe_bo *bo = xe_vma_bo(vma); - bool is_devmem = !xe_vma_is_userptr(vma) && bo && - (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo)); - struct xe_res_cursor curs; - struct xe_pt_stage_bind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_bind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .vm = xe_vma_vm(vma), - .tile = tile, - .curs = &curs, - .va_curs_start = xe_vma_start(vma), - .vma = vma, - .wupd.entries = entries, - .needs_64K = (xe_vma_vm(vma)->flags & XE_VM_FLAG_64K) && is_devmem, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - int ret; - - /** - * Default atomic expectations for different allocation scenarios are as follows: - * - * 1. Traditional API: When the VM is not in LR mode: - * - Device atomics are expected to function with all allocations. - * - * 2. Compute/SVM API: When the VM is in LR mode: - * - Device atomics are the default behavior when the bo is placed in a single region. - * - In all other cases device atomics will be disabled with AE=0 until an application - * request differently using a ioctl like madvise. - */ - if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) { - if (xe_vm_in_lr_mode(xe_vma_vm(vma))) { - if (bo && xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - /** - * If a SMEM+LMEM allocation is backed by SMEM, a device - * atomics will cause a gpu page fault and which then - * gets migrated to LMEM, bind such allocations with - * device atomics enabled. - */ - else if (is_devmem && !xe_bo_has_single_placement(bo)) - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } else { - xe_walk.default_pte |= XE_USM_PPGTT_PTE_AE; - } - - /** - * Unset AE if the platform(PVC) doesn't support it on an - * allocation - */ - if (!xe->info.has_device_atomics_on_smem && !is_devmem) - xe_walk.default_pte &= ~XE_USM_PPGTT_PTE_AE; - } - - if (is_devmem) { - xe_walk.default_pte |= XE_PPGTT_PTE_DM; - xe_walk.dma_offset = vram_region_gpu_offset(bo->ttm.resource); - } - - if (!xe_vma_has_no_bo(vma) && xe_bo_is_stolen(bo)) - xe_walk.dma_offset = xe_ttm_stolen_gpu_offset(xe_bo_device(bo)); - - xe_bo_assert_held(bo); - - if (!xe_vma_is_null(vma)) { - if (xe_vma_is_userptr(vma)) - xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0, - xe_vma_size(vma), &curs); - else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo)) - xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - else - xe_res_first_sg(xe_bo_sg(bo), xe_vma_bo_offset(vma), - xe_vma_size(vma), &curs); - } else { - curs.size = xe_vma_size(vma); - } - - ret = xe_pt_walk_range(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - *num_entries = xe_walk.wupd.num_used_entries; - return ret; -} - -/** - * xe_pt_nonshared_offsets() - Determine the non-shared entry offsets of a - * shared pagetable. - * @addr: The start address within the non-shared pagetable. - * @end: The end address within the non-shared pagetable. - * @level: The level of the non-shared pagetable. - * @walk: Walk info. The function adjusts the walk action. - * @action: next action to perform (see enum page_walk_action) - * @offset: Ignored on input, First non-shared entry on output. - * @end_offset: Ignored on input, Last non-shared entry + 1 on output. - * - * A non-shared page-table has some entries that belong to the address range - * and others that don't. This function determines the entries that belong - * fully to the address range. Depending on level, some entries may - * partially belong to the address range (that can't happen at level 0). - * The function detects that and adjust those offsets to not include those - * partial entries. Iff it does detect partial entries, we know that there must - * be shared page tables also at lower levels, so it adjusts the walk action - * accordingly. - * - * Return: true if there were non-shared entries, false otherwise. - */ -static bool xe_pt_nonshared_offsets(u64 addr, u64 end, unsigned int level, - struct xe_pt_walk *walk, - enum page_walk_action *action, - pgoff_t *offset, pgoff_t *end_offset) -{ - u64 size = 1ull << walk->shifts[level]; - - *offset = xe_pt_offset(addr, level, walk); - *end_offset = xe_pt_num_entries(addr, end, level, walk) + *offset; - - if (!level) - return true; - - /* - * If addr or next are not size aligned, there are shared pts at lower - * level, so in that case traverse down the subtree - */ - *action = ACTION_CONTINUE; - if (!IS_ALIGNED(addr, size)) { - *action = ACTION_SUBTREE; - (*offset)++; - } - - if (!IS_ALIGNED(end, size)) { - *action = ACTION_SUBTREE; - (*end_offset)--; - } - - return *end_offset > *offset; -} - -struct xe_pt_zap_ptes_walk { - /** @base: The walk base-class */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're building for */ - struct xe_tile *tile; - - /* Output */ - /** @needs_invalidate: Whether we need to invalidate TLB*/ - bool needs_invalidate; -}; - -static int xe_pt_zap_ptes_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_zap_ptes_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - /* - * Note that we're called from an entry callback, and we're dealing - * with the child of that entry rather than the parent, so need to - * adjust level down. - */ - if (xe_pt_nonshared_offsets(addr, next, --level, walk, action, &offset, - &end_offset)) { - xe_map_memset(tile_to_xe(xe_walk->tile), &xe_child->bo->vmap, - offset * sizeof(u64), 0, - (end_offset - offset) * sizeof(u64)); - xe_walk->needs_invalidate = true; - } - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_zap_ptes_ops = { - .pt_entry = xe_pt_zap_ptes_entry, -}; - -/** - * xe_pt_zap_ptes() - Zap (zero) gpu ptes of an address range - * @tile: The tile we're zapping for. - * @vma: GPU VMA detailing address range. - * - * Eviction and Userptr invalidation needs to be able to zap the - * gpu ptes of a given address range in pagefaulting mode. - * In order to be able to do that, that function needs access to the shared - * page-table entrieaso it can either clear the leaf PTEs or - * clear the pointers to lower-level page-tables. The caller is required - * to hold the necessary locks to ensure neither the page-table connectivity - * nor the page-table entries of the range is updated from under us. - * - * Return: Whether ptes were actually updated and a TLB invalidation is - * required. - */ -bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma) -{ - struct xe_pt_zap_ptes_walk xe_walk = { - .base = { - .ops = &xe_pt_zap_ptes_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated); - - if (!(pt_mask & BIT(tile->id))) - return false; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.needs_invalidate; -} - -static void -xe_vm_populate_pgtable(struct xe_migrate_pt_update *pt_update, struct xe_tile *tile, - struct iosys_map *map, void *data, - u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_pt_entry *ptes = update->pt_entries; - u64 *ptr = data; - u32 i; - - for (i = 0; i < num_qwords; i++) { - if (map) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, ptes[i].pte); - else - ptr[i] = ptes[i].pte; - } -} - -static void xe_pt_abort_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i, j; - - for (i = 0; i < num_entries; i++) { - if (!entries[i].pt_entries) - continue; - - for (j = 0; j < entries[i].qwords; j++) - xe_pt_destroy(entries[i].pt_entries[j].pt, xe_vma_vm(vma)->flags, NULL); - kfree(entries[i].pt_entries); - } -} - -static void xe_pt_commit_locks_assert(struct xe_vma *vma) -{ - struct xe_vm *vm = xe_vma_vm(vma); - - lockdep_assert_held(&vm->lock); - - if (!xe_vma_is_userptr(vma) && !xe_vma_is_null(vma)) - dma_resv_assert_held(xe_vma_bo(vma)->ttm.base.resv); - - xe_vm_assert_held(vm); -} - -static void xe_pt_commit_bind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, - u32 num_entries, bool rebind, - struct llist_head *deferred) -{ - u32 i, j; - - xe_pt_commit_locks_assert(vma); - - for (i = 0; i < num_entries; i++) { - struct xe_pt *pt = entries[i].pt; - struct xe_pt_dir *pt_dir; - - if (!rebind) - pt->num_live += entries[i].qwords; - - if (!pt->level) - continue; - - pt_dir = as_xe_pt_dir(pt); - for (j = 0; j < entries[i].qwords; j++) { - u32 j_ = j + entries[i].ofs; - struct xe_pt *newpte = entries[i].pt_entries[j].pt; - - if (xe_pt_entry(pt_dir, j_)) - xe_pt_destroy(xe_pt_entry(pt_dir, j_), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[j_] = &newpte->base; - } - } -} - -static void xe_pt_free_bind(struct xe_vm_pgtable_update *entries, - u32 num_entries) -{ - u32 i; - - for (i = 0; i < num_entries; i++) - kfree(entries[i].pt_entries); -} - -static int -xe_pt_prepare_bind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 *num_entries) -{ - int err; - - *num_entries = 0; - err = xe_pt_stage_bind(tile, vma, entries, num_entries); - if (!err) - xe_tile_assert(tile, *num_entries); - else /* abort! */ - xe_pt_abort_bind(vma, entries, *num_entries); - - return err; -} - -static void xe_vm_dbg_print_entries(struct xe_device *xe, - const struct xe_vm_pgtable_update *entries, - unsigned int num_entries, bool bind) -#if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)) -{ - unsigned int i; - - vm_dbg(&xe->drm, "%s: %u entries to update\n", bind ? "bind" : "unbind", - num_entries); - for (i = 0; i < num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &entries[i]; - struct xe_pt *xe_pt = entry->pt; - u64 page_size = 1ull << xe_pt_shift(xe_pt->level); - u64 end; - u64 start; - - xe_assert(xe, !entry->pt->is_compact); - start = entry->ofs * page_size; - end = start + page_size * entry->qwords; - vm_dbg(&xe->drm, - "\t%u: Update level %u at (%u + %u) [%llx...%llx) f:%x\n", - i, xe_pt->level, entry->ofs, entry->qwords, - xe_pt_addr(xe_pt) + start, xe_pt_addr(xe_pt) + end, 0); - } -} -#else -{} -#endif - -static bool no_in_syncs(struct xe_sync_entry *syncs, u32 num_syncs) -{ - int i; - - for (i = 0; i < num_syncs; i++) { - struct dma_fence *fence = syncs[i].fence; - - if (fence && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &fence->flags)) - return false; - } - - return true; -} - -static int job_test_add_deps(struct xe_sched_job *job, - struct dma_resv *resv, - enum dma_resv_usage usage) -{ - if (!job) { - if (!dma_resv_test_signaled(resv, usage)) - return -ETIME; - - return 0; - } - - return xe_sched_job_add_deps(job, resv, usage); -} - -static int vma_add_deps(struct xe_vma *vma, struct xe_sched_job *job) -{ - struct xe_bo *bo = xe_vma_bo(vma); - - xe_bo_assert_held(bo); - - if (bo && !bo->vm) - return job_test_add_deps(job, bo->ttm.base.resv, - DMA_RESV_USAGE_KERNEL); - - return 0; -} - -static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op, - struct xe_sched_job *job) -{ - int err = 0; - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = vma_add_deps(op->map.vma, job); - break; - case DRM_GPUVA_OP_REMAP: - if (op->remap.prev) - err = vma_add_deps(op->remap.prev, job); - if (!err && op->remap.next) - err = vma_add_deps(op->remap.next, job); - break; - case DRM_GPUVA_OP_UNMAP: - break; - case DRM_GPUVA_OP_PREFETCH: - err = vma_add_deps(gpuva_to_vma(op->base.prefetch.va), job); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static int xe_pt_vm_dependencies(struct xe_sched_job *job, - struct xe_vm *vm, - struct xe_vma_ops *vops, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_range_fence_tree *rftree) -{ - struct xe_range_fence *rtfence; - struct dma_fence *fence; - struct xe_vma_op *op; - int err = 0, i; - - xe_vm_assert_held(vm); - - if (!job && !no_in_syncs(vops->syncs, vops->num_syncs)) - return -ETIME; - - if (!job && !xe_exec_queue_is_idle(pt_update_ops->q)) - return -ETIME; - - if (pt_update_ops->wait_vm_bookkeep || pt_update_ops->wait_vm_kernel) { - err = job_test_add_deps(job, xe_vm_resv(vm), - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_BOOKKEEP : - DMA_RESV_USAGE_KERNEL); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_first(rftree, pt_update_ops->start, - pt_update_ops->last); - while (rtfence) { - fence = rtfence->fence; - - if (!dma_fence_is_signaled(fence)) { - /* - * Is this a CPU update? GPU is busy updating, so return - * an error - */ - if (!job) - return -ETIME; - - dma_fence_get(fence); - err = drm_sched_job_add_dependency(&job->drm, fence); - if (err) - return err; - } - - rtfence = xe_range_fence_tree_next(rtfence, - pt_update_ops->start, - pt_update_ops->last); - } - - list_for_each_entry(op, &vops->list, link) { - err = op_add_deps(vm, op, job); - if (err) - return err; - } - - if (job) - err = xe_sched_job_last_fence_add_dep(job, vm); - else - err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm); - - for (i = 0; job && !err && i < vops->num_syncs; i++) - err = xe_sync_entry_add_deps(&vops->syncs[i], job); - - return err; -} - -static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_vma_ops *vops = pt_update->vops; - struct xe_vm *vm = vops->vm; - struct xe_range_fence_tree *rftree = &vm->rftree[pt_update->tile_id]; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[pt_update->tile_id]; - - return xe_pt_vm_dependencies(pt_update->job, vm, pt_update->vops, - pt_update_ops, rftree); -} - -#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - u32 divisor = uvma->userptr.divisor ? uvma->userptr.divisor : 2; - static u32 count; - - if (count++ % divisor == divisor - 1) { - uvma->userptr.divisor = divisor << 1; - return true; - } - - return false; -} - -#else - -static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma) -{ - return false; -} - -#endif - -static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma, - struct xe_vm_pgtable_update_ops *pt_update) -{ - struct xe_userptr_vma *uvma; - unsigned long notifier_seq; - - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - if (!xe_vma_is_userptr(vma)) - return 0; - - uvma = to_userptr_vma(vma); - notifier_seq = uvma->userptr.notifier_seq; - - if (uvma->userptr.initial_bind && !xe_vm_in_fault_mode(vm)) - return 0; - - if (!mmu_interval_read_retry(&uvma->userptr.notifier, - notifier_seq) && - !xe_pt_userptr_inject_eagain(uvma)) - return 0; - - if (xe_vm_in_fault_mode(vm)) { - return -EAGAIN; - } else { - spin_lock(&vm->userptr.invalidated_lock); - list_move_tail(&uvma->userptr.invalidate_link, - &vm->userptr.invalidated); - spin_unlock(&vm->userptr.invalidated_lock); - - if (xe_vm_in_preempt_fence_mode(vm)) { - struct dma_resv_iter cursor; - struct dma_fence *fence; - long err; - - dma_resv_iter_begin(&cursor, xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP); - dma_resv_for_each_fence_unlocked(&cursor, fence) - dma_fence_enable_sw_signaling(fence); - dma_resv_iter_end(&cursor); - - err = dma_resv_wait_timeout(xe_vm_resv(vm), - DMA_RESV_USAGE_BOOKKEEP, - false, MAX_SCHEDULE_TIMEOUT); - XE_WARN_ON(err <= 0); - } - } - - return 0; -} - -static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op, - struct xe_vm_pgtable_update_ops *pt_update) -{ - int err = 0; - - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = vma_check_userptr(vm, op->map.vma, pt_update); - break; - case DRM_GPUVA_OP_REMAP: - if (op->remap.prev) - err = vma_check_userptr(vm, op->remap.prev, pt_update); - if (!err && op->remap.next) - err = vma_check_userptr(vm, op->remap.next, pt_update); - break; - case DRM_GPUVA_OP_UNMAP: - break; - case DRM_GPUVA_OP_PREFETCH: - err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), - pt_update); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update) -{ - struct xe_vm *vm = pt_update->vops->vm; - struct xe_vma_ops *vops = pt_update->vops; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[pt_update->tile_id]; - struct xe_vma_op *op; - int err; - - err = xe_pt_pre_commit(pt_update); - if (err) - return err; - - down_read(&vm->userptr.notifier_lock); - - list_for_each_entry(op, &vops->list, link) { - err = op_check_userptr(vm, op, pt_update_ops); - if (err) { - up_read(&vm->userptr.notifier_lock); - break; - } - } - - return err; -} - -struct invalidation_fence { - struct xe_gt_tlb_invalidation_fence base; - struct xe_gt *gt; - struct dma_fence *fence; - struct dma_fence_cb cb; - struct work_struct work; - u64 start; - u64 end; - u32 asid; -}; - -static void invalidation_fence_cb(struct dma_fence *fence, - struct dma_fence_cb *cb) -{ - struct invalidation_fence *ifence = - container_of(cb, struct invalidation_fence, cb); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base); - if (!ifence->fence->error) { - queue_work(system_wq, &ifence->work); - } else { - ifence->base.base.error = ifence->fence->error; - dma_fence_signal(&ifence->base.base); - dma_fence_put(&ifence->base.base); - } - dma_fence_put(ifence->fence); -} - -static void invalidation_fence_work_func(struct work_struct *w) -{ - struct invalidation_fence *ifence = - container_of(w, struct invalidation_fence, work); - struct xe_device *xe = gt_to_xe(ifence->gt); - - trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base); - xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start, - ifence->end, ifence->asid); -} - -static int invalidation_fence_init(struct xe_gt *gt, - struct invalidation_fence *ifence, - struct dma_fence *fence, - u64 start, u64 end, u32 asid) -{ - int ret; - - trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - - xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - - ifence->fence = fence; - ifence->gt = gt; - ifence->start = start; - ifence->end = end; - ifence->asid = asid; - - INIT_WORK(&ifence->work, invalidation_fence_work_func); - ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb); - if (ret == -ENOENT) { - dma_fence_put(ifence->fence); /* Usually dropped in CB */ - invalidation_fence_work_func(&ifence->work); - } else if (ret) { - dma_fence_put(&ifence->base.base); /* Caller ref */ - dma_fence_put(&ifence->base.base); /* Creation ref */ - } - - xe_gt_assert(gt, !ret || ret == -ENOENT); - - return ret && ret != -ENOENT ? ret : 0; -} - -struct xe_pt_stage_unbind_walk { - /** @base: The pagewalk base-class. */ - struct xe_pt_walk base; - - /* Input parameters for the walk */ - /** @tile: The tile we're unbinding from. */ - struct xe_tile *tile; - - /** - * @modified_start: Walk range start, modified to include any - * shared pagetables that we're the only user of and can thus - * treat as private. - */ - u64 modified_start; - /** @modified_end: Walk range start, modified like @modified_start. */ - u64 modified_end; - - /* Output */ - /* @wupd: Structure to track the page-table updates we're building */ - struct xe_walk_update wupd; -}; - -/* - * Check whether this range is the only one populating this pagetable, - * and in that case, update the walk range checks so that higher levels don't - * view us as a shared pagetable. - */ -static bool xe_pt_check_kill(u64 addr, u64 next, unsigned int level, - const struct xe_pt *child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - unsigned int shift = walk->shifts[level]; - u64 size = 1ull << shift; - - if (IS_ALIGNED(addr, size) && IS_ALIGNED(next, size) && - ((next - addr) >> shift) == child->num_live) { - u64 size = 1ull << walk->shifts[level + 1]; - - *action = ACTION_CONTINUE; - - if (xe_walk->modified_start >= addr) - xe_walk->modified_start = round_down(addr, size); - if (xe_walk->modified_end <= next) - xe_walk->modified_end = round_up(next, size); - - return true; - } - - return false; -} - -static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - - XE_WARN_ON(!*child); - XE_WARN_ON(!level); - - xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk); - - return 0; -} - -static int -xe_pt_stage_unbind_post_descend(struct xe_ptw *parent, pgoff_t offset, - unsigned int level, u64 addr, u64 next, - struct xe_ptw **child, - enum page_walk_action *action, - struct xe_pt_walk *walk) -{ - struct xe_pt_stage_unbind_walk *xe_walk = - container_of(walk, typeof(*xe_walk), base); - struct xe_pt *xe_child = container_of(*child, typeof(*xe_child), base); - pgoff_t end_offset; - u64 size = 1ull << walk->shifts[--level]; - - if (!IS_ALIGNED(addr, size)) - addr = xe_walk->modified_start; - if (!IS_ALIGNED(next, size)) - next = xe_walk->modified_end; - - /* Parent == *child is the root pt. Don't kill it. */ - if (parent != *child && - xe_pt_check_kill(addr, next, level, xe_child, action, walk)) - return 0; - - if (!xe_pt_nonshared_offsets(addr, next, level, walk, action, &offset, - &end_offset)) - return 0; - - (void)xe_pt_new_shared(&xe_walk->wupd, xe_child, offset, false); - xe_walk->wupd.updates[level].update->qwords = end_offset - offset; - - return 0; -} - -static const struct xe_pt_walk_ops xe_pt_stage_unbind_ops = { - .pt_entry = xe_pt_stage_unbind_entry, - .pt_post_descend = xe_pt_stage_unbind_post_descend, -}; - -/** - * xe_pt_stage_unbind() - Build page-table update structures for an unbind - * operation - * @tile: The tile we're unbinding for. - * @vma: The vma we're unbinding. - * @entries: Caller-provided storage for the update structures. - * - * Builds page-table update structures for an unbind operation. The function - * will attempt to remove all page-tables that we're the only user - * of, and for that to work, the unbind operation must be committed in the - * same critical section that blocks racing binds to the same page-table tree. - * - * Return: The number of entries used. - */ -static unsigned int xe_pt_stage_unbind(struct xe_tile *tile, struct xe_vma *vma, - struct xe_vm_pgtable_update *entries) -{ - struct xe_pt_stage_unbind_walk xe_walk = { - .base = { - .ops = &xe_pt_stage_unbind_ops, - .shifts = xe_normal_pt_shifts, - .max_level = XE_PT_HIGHEST_LEVEL, - }, - .tile = tile, - .modified_start = xe_vma_start(vma), - .modified_end = xe_vma_end(vma), - .wupd.entries = entries, - }; - struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id]; - - (void)xe_pt_walk_shared(&pt->base, pt->level, xe_vma_start(vma), - xe_vma_end(vma), &xe_walk.base); - - return xe_walk.wupd.num_used_entries; -} - -static void -xe_migrate_clear_pgtable_callback(struct xe_migrate_pt_update *pt_update, - struct xe_tile *tile, struct iosys_map *map, - void *ptr, u32 qword_ofs, u32 num_qwords, - const struct xe_vm_pgtable_update *update) -{ - struct xe_vm *vm = pt_update->vops->vm; - u64 empty = __xe_pt_empty_pte(tile, vm, update->pt->level); - int i; - - if (map && map->is_iomem) - for (i = 0; i < num_qwords; ++i) - xe_map_wr(tile_to_xe(tile), map, (qword_ofs + i) * - sizeof(u64), u64, empty); - else if (map) - memset64(map->vaddr + qword_ofs * sizeof(u64), empty, - num_qwords); - else - memset64(ptr, empty, num_qwords); -} - -static void -xe_pt_commit_unbind(struct xe_vma *vma, - struct xe_vm_pgtable_update *entries, u32 num_entries, - struct llist_head *deferred) -{ - u32 j; - - xe_pt_commit_locks_assert(vma); - - for (j = 0; j < num_entries; ++j) { - struct xe_vm_pgtable_update *entry = &entries[j]; - struct xe_pt *pt = entry->pt; - - pt->num_live -= entry->qwords; - if (pt->level) { - struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt); - u32 i; - - for (i = entry->ofs; i < entry->ofs + entry->qwords; - i++) { - if (xe_pt_entry(pt_dir, i)) - xe_pt_destroy(xe_pt_entry(pt_dir, i), - xe_vma_vm(vma)->flags, deferred); - - pt_dir->children[i] = NULL; - } - } - } -} - -static void -xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - int i, level = 0; - u64 start, last; - - for (i = 0; i < pt_op->num_entries; i++) { - const struct xe_vm_pgtable_update *entry = &pt_op->entries[i]; - - if (entry->pt->level > level) - level = entry->pt->level; - } - - /* Greedy (non-optimal) calculation but simple */ - start = ALIGN_DOWN(xe_vma_start(vma), 0x1ull << xe_pt_shift(level)); - last = ALIGN(xe_vma_end(vma), 0x1ull << xe_pt_shift(level)) - 1; - - if (start < pt_update_ops->start) - pt_update_ops->start = start; - if (last > pt_update_ops->last) - pt_update_ops->last = last; -} - -static int vma_reserve_fences(struct xe_device *xe, struct xe_vma *vma) -{ - int shift = xe_device_get_root_tile(xe)->media_gt ? 1 : 0; - - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) - return dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, - xe->info.tile_count << shift); - - return 0; -} - -static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - struct llist_head *deferred = &pt_update_ops->deferred; - int err; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing bind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - pt_op->vma = NULL; - pt_op->bind = true; - pt_op->rebind = BIT(tile->id) & vma->tile_present; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - err = xe_pt_prepare_bind(tile, vma, pt_op->entries, - &pt_op->num_entries); - if (!err) { - xe_tile_assert(tile, pt_op->num_entries <= - ARRAY_SIZE(pt_op->entries)); - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, true); - - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - - /* - * If rebind, we have to invalidate TLB on !LR vms to invalidate - * cached PTEs point to freed memory. On LR vms this is done - * automatically when the context is re-enabled by the rebind worker, - * or in fault mode it was invalidated on PTE zapping. - * - * If !rebind, and scratch enabled VMs, there is a chance the scratch - * PTE is already cached in the TLB so it needs to be invalidated. - * On !LR VMs this is done in the ring ops preceding a batch, but on - * non-faulting LR, in particular on user-space batch buffer chaining, - * it needs to be done here. - */ - if ((!pt_op->rebind && xe_vm_has_scratch(vm) && - xe_vm_in_preempt_fence_mode(vm))) - pt_update_ops->needs_invalidation = true; - else if (pt_op->rebind && !xe_vm_in_lr_mode(vm)) - /* We bump also if batch_invalidate_tlb is true */ - vm->tlb_flush_seqno++; - - /* FIXME: Don't commit right away */ - vma->tile_staged |= BIT(tile->id); - pt_op->vma = vma; - xe_pt_commit_bind(vma, pt_op->entries, pt_op->num_entries, - pt_op->rebind, deferred); - } - - return err; -} - -static int unbind_op_prepare(struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma) -{ - u32 current_op = pt_update_ops->current_op; - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[current_op]; - struct llist_head *deferred = &pt_update_ops->deferred; - int err; - - if (!((vma->tile_present | vma->tile_staged) & BIT(tile->id))) - return 0; - - xe_bo_assert_held(xe_vma_bo(vma)); - - vm_dbg(&xe_vma_vm(vma)->xe->drm, - "Preparing unbind, with range [%llx...%llx)\n", - xe_vma_start(vma), xe_vma_end(vma) - 1); - - /* - * Wait for invalidation to complete. Can corrupt internal page table - * state if an invalidation is running while preparing an unbind. - */ - if (xe_vma_is_userptr(vma) && xe_vm_in_fault_mode(xe_vma_vm(vma))) - mmu_interval_read_begin(&to_userptr_vma(vma)->userptr.notifier); - - pt_op->vma = vma; - pt_op->bind = false; - pt_op->rebind = false; - - err = vma_reserve_fences(tile_to_xe(tile), vma); - if (err) - return err; - - pt_op->num_entries = xe_pt_stage_unbind(tile, vma, pt_op->entries); - - xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries, - pt_op->num_entries, false); - xe_pt_update_ops_rfence_interval(pt_update_ops, vma); - ++pt_update_ops->current_op; - pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma); - pt_update_ops->needs_invalidation = true; - - /* FIXME: Don't commit right away */ - xe_pt_commit_unbind(vma, pt_op->entries, pt_op->num_entries, - deferred); - - return 0; -} - -static int op_prepare(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op) -{ - int err = 0; - - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma); - pt_update_ops->wait_vm_kernel = true; - break; - case DRM_GPUVA_OP_REMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va)); - - if (!err && op->remap.prev) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.prev); - pt_update_ops->wait_vm_bookkeep = true; - } - if (!err && op->remap.next) { - err = bind_op_prepare(vm, tile, pt_update_ops, - op->remap.next); - pt_update_ops->wait_vm_bookkeep = true; - } - break; - case DRM_GPUVA_OP_UNMAP: - err = unbind_op_prepare(tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va)); - break; - case DRM_GPUVA_OP_PREFETCH: - err = bind_op_prepare(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va)); - pt_update_ops->wait_vm_kernel = true; - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } - - return err; -} - -static void -xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops *pt_update_ops) -{ - init_llist_head(&pt_update_ops->deferred); - pt_update_ops->start = ~0x0ull; - pt_update_ops->last = 0x0ull; -} - -/** - * xe_pt_update_ops_prepare() - Prepare PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Prepare PT update operations which includes updating internal PT state, - * allocate memory for page tables, populate page table being pruned in, and - * create PT update operations for leaf insertion / removal. - * - * Return: 0 on success, negative error code on error. - */ -int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct xe_vma_op *op; - int shift = tile->media_gt ? 1 : 0; - int err; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - xe_pt_update_ops_init(pt_update_ops); - - err = dma_resv_reserve_fences(xe_vm_resv(vops->vm), - tile_to_xe(tile)->info.tile_count << shift); - if (err) - return err; - - list_for_each_entry(op, &vops->list, link) { - err = op_prepare(vops->vm, tile, pt_update_ops, op); - - if (err) - return err; - } - - xe_tile_assert(tile, pt_update_ops->current_op <= - pt_update_ops->num_ops); - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vops->vm->xe->vm_inject_error_position == FORCE_OP_ERROR_PREPARE) - return -ENOSPC; -#endif - - return 0; -} - -static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present |= BIT(tile->id); - vma->tile_staged &= ~BIT(tile->id); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - to_userptr_vma(vma)->userptr.initial_bind = true; - } - - /* - * Kick rebind worker if this bind triggers preempt fences and not in - * the rebind worker - */ - if (pt_update_ops->wait_vm_bookkeep && - xe_vm_in_preempt_fence_mode(vm) && - !current->mm) - xe_vm_queue_rebind_worker(vm); -} - -static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma *vma, struct dma_fence *fence, - struct dma_fence *fence2) -{ - if (!xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm) { - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - if (fence2) - dma_resv_add_fence(xe_vma_bo(vma)->ttm.base.resv, fence2, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - } - vma->tile_present &= ~BIT(tile->id); - if (!vma->tile_present) { - list_del_init(&vma->combined_links.rebind); - if (xe_vma_is_userptr(vma)) { - lockdep_assert_held_read(&vm->userptr.notifier_lock); - - spin_lock(&vm->userptr.invalidated_lock); - list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link); - spin_unlock(&vm->userptr.invalidated_lock); - } - } -} - -static void op_commit(struct xe_vm *vm, - struct xe_tile *tile, - struct xe_vm_pgtable_update_ops *pt_update_ops, - struct xe_vma_op *op, struct dma_fence *fence, - struct dma_fence *fence2) -{ - xe_vm_assert_held(vm); - - switch (op->base.op) { - case DRM_GPUVA_OP_MAP: - if (!op->map.immediate && xe_vm_in_fault_mode(vm)) - break; - - bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, - fence2); - break; - case DRM_GPUVA_OP_REMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.remap.unmap->va), fence, - fence2); - - if (op->remap.prev) - bind_op_commit(vm, tile, pt_update_ops, op->remap.prev, - fence, fence2); - if (op->remap.next) - bind_op_commit(vm, tile, pt_update_ops, op->remap.next, - fence, fence2); - break; - case DRM_GPUVA_OP_UNMAP: - unbind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.unmap.va), fence, fence2); - break; - case DRM_GPUVA_OP_PREFETCH: - bind_op_commit(vm, tile, pt_update_ops, - gpuva_to_vma(op->base.prefetch.va), fence, fence2); - break; - default: - drm_warn(&vm->xe->drm, "NOT POSSIBLE"); - } -} - -static const struct xe_migrate_pt_update_ops migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_pre_commit, -}; - -static const struct xe_migrate_pt_update_ops userptr_migrate_ops = { - .populate = xe_vm_populate_pgtable, - .clear = xe_migrate_clear_pgtable_callback, - .pre_commit = xe_pt_userptr_pre_commit, -}; - -/** - * xe_pt_update_ops_run() - Run PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Run PT update operations which includes committing internal PT state changes, - * creating job for PT update operations for leaf insertion / removal, and - * installing job fence in various places. - * - * Return: fence on success, negative ERR_PTR on error. - */ -struct dma_fence * -xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm *vm = vops->vm; - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - struct dma_fence *fence; - struct invalidation_fence *ifence = NULL, *mfence = NULL; - struct dma_fence **fences = NULL; - struct dma_fence_array *cf = NULL; - struct xe_range_fence *rfence; - struct xe_vma_op *op; - int err = 0; - struct xe_migrate_pt_update update = { - .ops = pt_update_ops->needs_userptr_lock ? - &userptr_migrate_ops : - &migrate_ops, - .vops = vops, - .tile_id = tile->id, - }; - - lockdep_assert_held(&vm->lock); - xe_vm_assert_held(vm); - - if (!pt_update_ops->current_op) { - xe_tile_assert(tile, xe_vm_in_fault_mode(vm)); - - return dma_fence_get_stub(); - } - -#ifdef TEST_VM_OPS_ERROR - if (vops->inject_error && - vm->xe->vm_inject_error_position == FORCE_OP_ERROR_RUN) - return ERR_PTR(-ENOSPC); -#endif - - if (pt_update_ops->needs_invalidation) { - ifence = kzalloc(sizeof(*ifence), GFP_KERNEL); -<<<<<<< - if (!ifence) - return ERR_PTR(-ENOMEM); -======= - if (!ifence) { - err = -ENOMEM; - goto kill_vm_tile1; - } - if (tile->media_gt) { - mfence = kzalloc(sizeof(*ifence), GFP_KERNEL); - if (!mfence) { - err = -ENOMEM; - goto free_ifence; - } - fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL); - if (!fences) { - err = -ENOMEM; - goto free_ifence; - } - cf = dma_fence_array_alloc(2); - if (!cf) { - err = -ENOMEM; - goto free_ifence; - } - } ->>>>>>> - } - - rfence = kzalloc(sizeof(*rfence), GFP_KERNEL); - if (!rfence) { - err = -ENOMEM; - goto free_ifence; - } - - fence = xe_migrate_update_pgtables(tile->migrate, &update); - if (IS_ERR(fence)) { - err = PTR_ERR(fence); - goto free_rfence; - } - - if (xe_range_fence_insert(&vm->rftree[tile->id], rfence, - &xe_range_fence_kfree_ops, - pt_update_ops->start, - pt_update_ops->last, fence)) - dma_fence_wait(fence, false); - - /* tlb invalidation must be done before signaling rebind */ - if (ifence) { -<<<<<<< - err = invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, - vm->usm.asid); - if (err) - goto put_fence; - fence = &ifence->base.base; -======= - if (mfence) - dma_fence_get(fence); - invalidation_fence_init(tile->primary_gt, ifence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - if (mfence) { - invalidation_fence_init(tile->media_gt, mfence, fence, - pt_update_ops->start, - pt_update_ops->last, vm->usm.asid); - fences[0] = &ifence->base.base; - fences[1] = &mfence->base.base; - dma_fence_array_init(cf, 2, fences, - vm->composite_fence_ctx, - vm->composite_fence_seqno++, - false); - fence = &cf->base; - } else { - fence = &ifence->base.base; - } ->>>>>>> - } - - if (!mfence) { - dma_resv_add_fence(xe_vm_resv(vm), fence, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL); - } else { - dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base, - pt_update_ops->wait_vm_bookkeep ? - DMA_RESV_USAGE_KERNEL : - DMA_RESV_USAGE_BOOKKEEP); - - list_for_each_entry(op, &vops->list, link) - op_commit(vops->vm, tile, pt_update_ops, op, - &ifence->base.base, &mfence->base.base); - } - - if (pt_update_ops->needs_userptr_lock) - up_read(&vm->userptr.notifier_lock); - - return fence; - -put_fence: - if (pt_update_ops->needs_userptr_lock) - up_read(&vm->userptr.notifier_lock); - dma_fence_put(fence); -free_rfence: - kfree(rfence); -free_ifence: - kfree(cf); - kfree(fences); - kfree(mfence); - kfree(ifence); - - return ERR_PTR(err); -} - -/** - * xe_pt_update_ops_fini() - Finish PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operations - * - * Finish PT update operations by committing to destroy page table memory - */ -void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - struct xe_vm_pgtable_update_ops *pt_update_ops = - &vops->pt_update_ops[tile->id]; - int i; - - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - /* FIXME: Not 100% correct */ - for (i = 0; i < pt_update_ops->num_ops; ++i) { - struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops->ops[i]; - - if (pt_op->bind) - xe_pt_free_bind(pt_op->entries, pt_op->num_entries); - } - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); -} - -/** - * xe_pt_update_ops_abort() - Abort PT update operations - * @tile: Tile of PT update operations - * @vops: VMA operationa - * - * Abort PT update operations by unwinding internal PT state - */ -void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops) -{ - lockdep_assert_held(&vops->vm->lock); - xe_vm_assert_held(vops->vm); - - /* FIXME: Just kill VM for now + cleanup PTs */ - xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred); - xe_vm_kill(vops->vm, false); -} diff --git a/rr-cache/df142b0dd2682ec0125aef7b0454a6c2796441e0/preimage b/rr-cache/df142b0dd2682ec0125aef7b0454a6c2796441e0/preimage deleted file mode 100644 index bcb6a7876029..000000000000 --- a/rr-cache/df142b0dd2682ec0125aef7b0454a6c2796441e0/preimage +++ /dev/null @@ -1,892 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * random utility code, for bcache but in theory not specific to bcache - * - * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> - * Copyright 2012 Google, Inc. - */ - -#include <linux/bio.h> -#include <linux/blkdev.h> -#include <linux/ctype.h> -#include <linux/debugfs.h> -#include <linux/freezer.h> -#include <linux/kthread.h> -#include <linux/log2.h> -#include <linux/math64.h> -#include <linux/percpu.h> -#include <linux/preempt.h> -#include <linux/random.h> -#include <linux/seq_file.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/sched/clock.h> - -#include "eytzinger.h" -#include "mean_and_variance.h" -#include "util.h" - -static const char si_units[] = "?kMGTPEZY"; - -/* string_get_size units: */ -static const char *const units_2[] = { - "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" -}; -static const char *const units_10[] = { - "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB" -}; - -static int parse_u64(const char *cp, u64 *res) -{ - const char *start = cp; - u64 v = 0; - - if (!isdigit(*cp)) - return -EINVAL; - - do { - if (v > U64_MAX / 10) - return -ERANGE; - v *= 10; - if (v > U64_MAX - (*cp - '0')) - return -ERANGE; - v += *cp - '0'; - cp++; - } while (isdigit(*cp)); - - *res = v; - return cp - start; -} - -static int bch2_pow(u64 n, u64 p, u64 *res) -{ - *res = 1; - - while (p--) { - if (*res > div_u64(U64_MAX, n)) - return -ERANGE; - *res *= n; - } - return 0; -} - -static int parse_unit_suffix(const char *cp, u64 *res) -{ - const char *start = cp; - u64 base = 1024; - unsigned u; - int ret; - - if (*cp == ' ') - cp++; - - for (u = 1; u < strlen(si_units); u++) - if (*cp == si_units[u]) { - cp++; - goto got_unit; - } - - for (u = 0; u < ARRAY_SIZE(units_2); u++) - if (!strncmp(cp, units_2[u], strlen(units_2[u]))) { - cp += strlen(units_2[u]); - goto got_unit; - } - - for (u = 0; u < ARRAY_SIZE(units_10); u++) - if (!strncmp(cp, units_10[u], strlen(units_10[u]))) { - cp += strlen(units_10[u]); - base = 1000; - goto got_unit; - } - - *res = 1; - return 0; -got_unit: - ret = bch2_pow(base, u, res); - if (ret) - return ret; - - return cp - start; -} - -#define parse_or_ret(cp, _f) \ -do { \ - int _ret = _f; \ - if (_ret < 0) \ - return _ret; \ - cp += _ret; \ -} while (0) - -static int __bch2_strtou64_h(const char *cp, u64 *res) -{ - const char *start = cp; - u64 v = 0, b, f_n = 0, f_d = 1; - int ret; - - parse_or_ret(cp, parse_u64(cp, &v)); - - if (*cp == '.') { - cp++; - ret = parse_u64(cp, &f_n); - if (ret < 0) - return ret; - cp += ret; - - ret = bch2_pow(10, ret, &f_d); - if (ret) - return ret; - } - - parse_or_ret(cp, parse_unit_suffix(cp, &b)); - - if (v > div_u64(U64_MAX, b)) - return -ERANGE; - v *= b; - - if (f_n > div_u64(U64_MAX, b)) - return -ERANGE; - - f_n = div_u64(f_n * b, f_d); - if (v + f_n < v) - return -ERANGE; - v += f_n; - - *res = v; - return cp - start; -} - -static int __bch2_strtoh(const char *cp, u64 *res, - u64 t_max, bool t_signed) -{ - bool positive = *cp != '-'; - u64 v = 0; - - if (*cp == '+' || *cp == '-') - cp++; - - parse_or_ret(cp, __bch2_strtou64_h(cp, &v)); - - if (*cp == '\n') - cp++; - if (*cp) - return -EINVAL; - - if (positive) { - if (v > t_max) - return -ERANGE; - } else { - if (v && !t_signed) - return -ERANGE; - - if (v > t_max + 1) - return -ERANGE; - v = -v; - } - - *res = v; - return 0; -} - -#define STRTO_H(name, type) \ -int bch2_ ## name ## _h(const char *cp, type *res) \ -{ \ - u64 v = 0; \ - int ret = __bch2_strtoh(cp, &v, ANYSINT_MAX(type), \ - ANYSINT_MAX(type) != ((type) ~0ULL)); \ - *res = v; \ - return ret; \ -} - -STRTO_H(strtoint, int) -STRTO_H(strtouint, unsigned int) -STRTO_H(strtoll, long long) -STRTO_H(strtoull, unsigned long long) -STRTO_H(strtou64, u64) - -u64 bch2_read_flag_list(char *opt, const char * const list[]) -{ - u64 ret = 0; - char *p, *s, *d = kstrdup(opt, GFP_KERNEL); - - if (!d) - return -ENOMEM; - - s = strim(d); - - while ((p = strsep(&s, ","))) { - int flag = match_string(list, -1, p); - - if (flag < 0) { - ret = -1; - break; - } - - ret |= 1 << flag; - } - - kfree(d); - - return ret; -} - -bool bch2_is_zero(const void *_p, size_t n) -{ - const char *p = _p; - size_t i; - - for (i = 0; i < n; i++) - if (p[i]) - return false; - return true; -} - -void bch2_prt_u64_base2_nbits(struct printbuf *out, u64 v, unsigned nr_bits) -{ - while (nr_bits) - prt_char(out, '0' + ((v >> --nr_bits) & 1)); -} - -void bch2_prt_u64_base2(struct printbuf *out, u64 v) -{ - bch2_prt_u64_base2_nbits(out, v, fls64(v) ?: 1); -} - -static void __bch2_print_string_as_lines(const char *prefix, const char *lines, - bool nonblocking) -{ - bool locked = false; - const char *p; - - if (!lines) { - printk("%s (null)\n", prefix); - return; - } - -<<<<<<< -======= - if (!nonblocking) { - console_lock(); - locked = true; - } else { - locked = console_trylock(); - } - ->>>>>>> - while (1) { - p = strchrnul(lines, '\n'); - printk("%s%.*s\n", prefix, (int) (p - lines), lines); - if (!*p) - break; - lines = p + 1; - } -<<<<<<< -======= - if (locked) - console_unlock(); -} - -void bch2_print_string_as_lines(const char *prefix, const char *lines) -{ - return __bch2_print_string_as_lines(prefix, lines, false); -} - -void bch2_print_string_as_lines_nonblocking(const char *prefix, const char *lines) -{ - return __bch2_print_string_as_lines(prefix, lines, true); ->>>>>>> -} - -int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigned skipnr, - gfp_t gfp) -{ -#ifdef CONFIG_STACKTRACE - unsigned nr_entries = 0; - - stack->nr = 0; - int ret = darray_make_room_gfp(stack, 32, gfp); - if (ret) - return ret; - - if (!down_read_trylock(&task->signal->exec_update_lock)) - return -1; - - do { - nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr + 1); - } while (nr_entries == stack->size && - !(ret = darray_make_room_gfp(stack, stack->size * 2, gfp))); - - stack->nr = nr_entries; - up_read(&task->signal->exec_update_lock); - - return ret; -#else - return 0; -#endif -} - -void bch2_prt_backtrace(struct printbuf *out, bch_stacktrace *stack) -{ - darray_for_each(*stack, i) { - prt_printf(out, "[<0>] %pB", (void *) *i); - prt_newline(out); - } -} - -int bch2_prt_task_backtrace(struct printbuf *out, struct task_struct *task, unsigned skipnr, gfp_t gfp) -{ - bch_stacktrace stack = { 0 }; - int ret = bch2_save_backtrace(&stack, task, skipnr + 1, gfp); - - bch2_prt_backtrace(out, &stack); - darray_exit(&stack); - return ret; -} - -#ifndef __KERNEL__ -#include <time.h> -void bch2_prt_datetime(struct printbuf *out, time64_t sec) -{ - time_t t = sec; - char buf[64]; - ctime_r(&t, buf); - strim(buf); - prt_str(out, buf); -} -#else -void bch2_prt_datetime(struct printbuf *out, time64_t sec) -{ - char buf[64]; - snprintf(buf, sizeof(buf), "%ptT", &sec); - prt_u64(out, sec); -} -#endif - -void bch2_pr_time_units(struct printbuf *out, u64 ns) -{ - const struct time_unit *u = bch2_pick_time_units(ns); - - prt_printf(out, "%llu %s", div_u64(ns, u->nsecs), u->name); -} - -static void bch2_pr_time_units_aligned(struct printbuf *out, u64 ns) -{ - const struct time_unit *u = bch2_pick_time_units(ns); - - prt_printf(out, "%llu \r%s", div64_u64(ns, u->nsecs), u->name); -} - -static inline void pr_name_and_units(struct printbuf *out, const char *name, u64 ns) -{ - prt_printf(out, "%s\t", name); - bch2_pr_time_units_aligned(out, ns); - prt_newline(out); -} - -#define TABSTOP_SIZE 12 - -void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats) -{ - struct quantiles *quantiles = time_stats_to_quantiles(stats); - s64 f_mean = 0, d_mean = 0; - u64 f_stddev = 0, d_stddev = 0; - - if (stats->buffer) { - int cpu; - - spin_lock_irq(&stats->lock); - for_each_possible_cpu(cpu) - __bch2_time_stats_clear_buffer(stats, per_cpu_ptr(stats->buffer, cpu)); - spin_unlock_irq(&stats->lock); - } - - /* - * avoid divide by zero - */ - if (stats->freq_stats.n) { - f_mean = mean_and_variance_get_mean(stats->freq_stats); - f_stddev = mean_and_variance_get_stddev(stats->freq_stats); - d_mean = mean_and_variance_get_mean(stats->duration_stats); - d_stddev = mean_and_variance_get_stddev(stats->duration_stats); - } - - printbuf_tabstop_push(out, out->indent + TABSTOP_SIZE); - prt_printf(out, "count:\t%llu\n", stats->duration_stats.n); - printbuf_tabstop_pop(out); - - printbuf_tabstops_reset(out); - - printbuf_tabstop_push(out, out->indent + 20); - printbuf_tabstop_push(out, TABSTOP_SIZE + 2); - printbuf_tabstop_push(out, 0); - printbuf_tabstop_push(out, TABSTOP_SIZE + 2); - - prt_printf(out, "\tsince mount\r\trecent\r\n"); - - printbuf_tabstops_reset(out); - printbuf_tabstop_push(out, out->indent + 20); - printbuf_tabstop_push(out, TABSTOP_SIZE); - printbuf_tabstop_push(out, 2); - printbuf_tabstop_push(out, TABSTOP_SIZE); - - prt_printf(out, "duration of events\n"); - printbuf_indent_add(out, 2); - - pr_name_and_units(out, "min:", stats->min_duration); - pr_name_and_units(out, "max:", stats->max_duration); - pr_name_and_units(out, "total:", stats->total_duration); - - prt_printf(out, "mean:\t"); - bch2_pr_time_units_aligned(out, d_mean); - prt_tab(out); - bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT)); - prt_newline(out); - - prt_printf(out, "stddev:\t"); - bch2_pr_time_units_aligned(out, d_stddev); - prt_tab(out); - bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->duration_stats_weighted, TIME_STATS_MV_WEIGHT)); - - printbuf_indent_sub(out, 2); - prt_newline(out); - - prt_printf(out, "time between events\n"); - printbuf_indent_add(out, 2); - - pr_name_and_units(out, "min:", stats->min_freq); - pr_name_and_units(out, "max:", stats->max_freq); - - prt_printf(out, "mean:\t"); - bch2_pr_time_units_aligned(out, f_mean); - prt_tab(out); - bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_mean(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT)); - prt_newline(out); - - prt_printf(out, "stddev:\t"); - bch2_pr_time_units_aligned(out, f_stddev); - prt_tab(out); - bch2_pr_time_units_aligned(out, mean_and_variance_weighted_get_stddev(stats->freq_stats_weighted, TIME_STATS_MV_WEIGHT)); - - printbuf_indent_sub(out, 2); - prt_newline(out); - - printbuf_tabstops_reset(out); - - if (quantiles) { - int i = eytzinger0_first(NR_QUANTILES); - const struct time_unit *u = - bch2_pick_time_units(quantiles->entries[i].m); - u64 last_q = 0; - - prt_printf(out, "quantiles (%s):\t", u->name); - eytzinger0_for_each(i, NR_QUANTILES) { - bool is_last = eytzinger0_next(i, NR_QUANTILES) == -1; - - u64 q = max(quantiles->entries[i].m, last_q); - prt_printf(out, "%llu ", div_u64(q, u->nsecs)); - if (is_last) - prt_newline(out); - last_q = q; - } - } -} - -/* ratelimit: */ - -/** - * bch2_ratelimit_delay() - return how long to delay until the next time to do - * some work - * @d: the struct bch_ratelimit to update - * Returns: the amount of time to delay by, in jiffies - */ -u64 bch2_ratelimit_delay(struct bch_ratelimit *d) -{ - u64 now = local_clock(); - - return time_after64(d->next, now) - ? nsecs_to_jiffies(d->next - now) - : 0; -} - -/** - * bch2_ratelimit_increment() - increment @d by the amount of work done - * @d: the struct bch_ratelimit to update - * @done: the amount of work done, in arbitrary units - */ -void bch2_ratelimit_increment(struct bch_ratelimit *d, u64 done) -{ - u64 now = local_clock(); - - d->next += div_u64(done * NSEC_PER_SEC, d->rate); - - if (time_before64(now + NSEC_PER_SEC, d->next)) - d->next = now + NSEC_PER_SEC; - - if (time_after64(now - NSEC_PER_SEC * 2, d->next)) - d->next = now - NSEC_PER_SEC * 2; -} - -/* pd controller: */ - -/* - * Updates pd_controller. Attempts to scale inputed values to units per second. - * @target: desired value - * @actual: current value - * - * @sign: 1 or -1; 1 if increasing the rate makes actual go up, -1 if increasing - * it makes actual go down. - */ -void bch2_pd_controller_update(struct bch_pd_controller *pd, - s64 target, s64 actual, int sign) -{ - s64 proportional, derivative, change; - - unsigned long seconds_since_update = (jiffies - pd->last_update) / HZ; - - if (seconds_since_update == 0) - return; - - pd->last_update = jiffies; - - proportional = actual - target; - proportional *= seconds_since_update; - proportional = div_s64(proportional, pd->p_term_inverse); - - derivative = actual - pd->last_actual; - derivative = div_s64(derivative, seconds_since_update); - derivative = ewma_add(pd->smoothed_derivative, derivative, - (pd->d_term / seconds_since_update) ?: 1); - derivative = derivative * pd->d_term; - derivative = div_s64(derivative, pd->p_term_inverse); - - change = proportional + derivative; - - /* Don't increase rate if not keeping up */ - if (change > 0 && - pd->backpressure && - time_after64(local_clock(), - pd->rate.next + NSEC_PER_MSEC)) - change = 0; - - change *= (sign * -1); - - pd->rate.rate = clamp_t(s64, (s64) pd->rate.rate + change, - 1, UINT_MAX); - - pd->last_actual = actual; - pd->last_derivative = derivative; - pd->last_proportional = proportional; - pd->last_change = change; - pd->last_target = target; -} - -void bch2_pd_controller_init(struct bch_pd_controller *pd) -{ - pd->rate.rate = 1024; - pd->last_update = jiffies; - pd->p_term_inverse = 6000; - pd->d_term = 30; - pd->d_smooth = pd->d_term; - pd->backpressure = 1; -} - -void bch2_pd_controller_debug_to_text(struct printbuf *out, struct bch_pd_controller *pd) -{ - if (!out->nr_tabstops) - printbuf_tabstop_push(out, 20); - - prt_printf(out, "rate:\t"); - prt_human_readable_s64(out, pd->rate.rate); - prt_newline(out); - - prt_printf(out, "target:\t"); - prt_human_readable_u64(out, pd->last_target); - prt_newline(out); - - prt_printf(out, "actual:\t"); - prt_human_readable_u64(out, pd->last_actual); - prt_newline(out); - - prt_printf(out, "proportional:\t"); - prt_human_readable_s64(out, pd->last_proportional); - prt_newline(out); - - prt_printf(out, "derivative:\t"); - prt_human_readable_s64(out, pd->last_derivative); - prt_newline(out); - - prt_printf(out, "change:\t"); - prt_human_readable_s64(out, pd->last_change); - prt_newline(out); - - prt_printf(out, "next io:\t%llims\n", div64_s64(pd->rate.next - local_clock(), NSEC_PER_MSEC)); -} - -/* misc: */ - -void bch2_bio_map(struct bio *bio, void *base, size_t size) -{ - while (size) { - struct page *page = is_vmalloc_addr(base) - ? vmalloc_to_page(base) - : virt_to_page(base); - unsigned offset = offset_in_page(base); - unsigned len = min_t(size_t, PAGE_SIZE - offset, size); - - BUG_ON(!bio_add_page(bio, page, len, offset)); - size -= len; - base += len; - } -} - -int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask) -{ - while (size) { - struct page *page = alloc_pages(gfp_mask, 0); - unsigned len = min_t(size_t, PAGE_SIZE, size); - - if (!page) - return -ENOMEM; - - if (unlikely(!bio_add_page(bio, page, len, 0))) { - __free_page(page); - break; - } - - size -= len; - } - - return 0; -} - -size_t bch2_rand_range(size_t max) -{ - size_t rand; - - if (!max) - return 0; - - do { - rand = get_random_long(); - rand &= roundup_pow_of_two(max) - 1; - } while (rand >= max); - - return rand; -} - -void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src) -{ - struct bio_vec bv; - struct bvec_iter iter; - - __bio_for_each_segment(bv, dst, iter, dst_iter) { - void *dstp = kmap_local_page(bv.bv_page); - - memcpy(dstp + bv.bv_offset, src, bv.bv_len); - kunmap_local(dstp); - - src += bv.bv_len; - } -} - -void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter) -{ - struct bio_vec bv; - struct bvec_iter iter; - - __bio_for_each_segment(bv, src, iter, src_iter) { - void *srcp = kmap_local_page(bv.bv_page); - - memcpy(dst, srcp + bv.bv_offset, bv.bv_len); - kunmap_local(srcp); - - dst += bv.bv_len; - } -} - -#if 0 -void eytzinger1_test(void) -{ - unsigned inorder, eytz, size; - - pr_info("1 based eytzinger test:"); - - for (size = 2; - size < 65536; - size++) { - unsigned extra = eytzinger1_extra(size); - - if (!(size % 4096)) - pr_info("tree size %u", size); - - BUG_ON(eytzinger1_prev(0, size) != eytzinger1_last(size)); - BUG_ON(eytzinger1_next(0, size) != eytzinger1_first(size)); - - BUG_ON(eytzinger1_prev(eytzinger1_first(size), size) != 0); - BUG_ON(eytzinger1_next(eytzinger1_last(size), size) != 0); - - inorder = 1; - eytzinger1_for_each(eytz, size) { - BUG_ON(__inorder_to_eytzinger1(inorder, size, extra) != eytz); - BUG_ON(__eytzinger1_to_inorder(eytz, size, extra) != inorder); - BUG_ON(eytz != eytzinger1_last(size) && - eytzinger1_prev(eytzinger1_next(eytz, size), size) != eytz); - - inorder++; - } - } -} - -void eytzinger0_test(void) -{ - - unsigned inorder, eytz, size; - - pr_info("0 based eytzinger test:"); - - for (size = 1; - size < 65536; - size++) { - unsigned extra = eytzinger0_extra(size); - - if (!(size % 4096)) - pr_info("tree size %u", size); - - BUG_ON(eytzinger0_prev(-1, size) != eytzinger0_last(size)); - BUG_ON(eytzinger0_next(-1, size) != eytzinger0_first(size)); - - BUG_ON(eytzinger0_prev(eytzinger0_first(size), size) != -1); - BUG_ON(eytzinger0_next(eytzinger0_last(size), size) != -1); - - inorder = 0; - eytzinger0_for_each(eytz, size) { - BUG_ON(__inorder_to_eytzinger0(inorder, size, extra) != eytz); - BUG_ON(__eytzinger0_to_inorder(eytz, size, extra) != inorder); - BUG_ON(eytz != eytzinger0_last(size) && - eytzinger0_prev(eytzinger0_next(eytz, size), size) != eytz); - - inorder++; - } - } -} - -static inline int cmp_u16(const void *_l, const void *_r, size_t size) -{ - const u16 *l = _l, *r = _r; - - return (*l > *r) - (*r - *l); -} - -static void eytzinger0_find_test_val(u16 *test_array, unsigned nr, u16 search) -{ - int i, c1 = -1, c2 = -1; - ssize_t r; - - r = eytzinger0_find_le(test_array, nr, - sizeof(test_array[0]), - cmp_u16, &search); - if (r >= 0) - c1 = test_array[r]; - - for (i = 0; i < nr; i++) - if (test_array[i] <= search && test_array[i] > c2) - c2 = test_array[i]; - - if (c1 != c2) { - eytzinger0_for_each(i, nr) - pr_info("[%3u] = %12u", i, test_array[i]); - pr_info("find_le(%2u) -> [%2zi] = %2i should be %2i", - i, r, c1, c2); - } -} - -void eytzinger0_find_test(void) -{ - unsigned i, nr, allocated = 1 << 12; - u16 *test_array = kmalloc_array(allocated, sizeof(test_array[0]), GFP_KERNEL); - - for (nr = 1; nr < allocated; nr++) { - pr_info("testing %u elems", nr); - - get_random_bytes(test_array, nr * sizeof(test_array[0])); - eytzinger0_sort(test_array, nr, sizeof(test_array[0]), cmp_u16, NULL); - - /* verify array is sorted correctly: */ - eytzinger0_for_each(i, nr) - BUG_ON(i != eytzinger0_last(nr) && - test_array[i] > test_array[eytzinger0_next(i, nr)]); - - for (i = 0; i < U16_MAX; i += 1 << 12) - eytzinger0_find_test_val(test_array, nr, i); - - for (i = 0; i < nr; i++) { - eytzinger0_find_test_val(test_array, nr, test_array[i] - 1); - eytzinger0_find_test_val(test_array, nr, test_array[i]); - eytzinger0_find_test_val(test_array, nr, test_array[i] + 1); - } - } - - kfree(test_array); -} -#endif - -/* - * Accumulate percpu counters onto one cpu's copy - only valid when access - * against any percpu counter is guarded against - */ -u64 *bch2_acc_percpu_u64s(u64 __percpu *p, unsigned nr) -{ - u64 *ret; - int cpu; - - /* access to pcpu vars has to be blocked by other locking */ - preempt_disable(); - ret = this_cpu_ptr(p); - preempt_enable(); - - for_each_possible_cpu(cpu) { - u64 *i = per_cpu_ptr(p, cpu); - - if (i != ret) { - acc_u64s(ret, i, nr); - memset(i, 0, nr * sizeof(u64)); - } - } - - return ret; -} - -void bch2_darray_str_exit(darray_str *d) -{ - darray_for_each(*d, i) - kfree(*i); - darray_exit(d); -} - -int bch2_split_devs(const char *_dev_name, darray_str *ret) -{ - darray_init(ret); - - char *dev_name, *s, *orig; - - dev_name = orig = kstrdup(_dev_name, GFP_KERNEL); - if (!dev_name) - return -ENOMEM; - - while ((s = strsep(&dev_name, ":"))) { - char *p = kstrdup(s, GFP_KERNEL); - if (!p) - goto err; - - if (darray_push(ret, p)) { - kfree(p); - goto err; - } - } - - kfree(orig); - return 0; -err: - bch2_darray_str_exit(ret); - kfree(orig); - return -ENOMEM; -} diff --git a/rr-cache/e7c9aafc2297a37f89715cfeed48ccbfb82f76bb/preimage.8 b/rr-cache/e7c9aafc2297a37f89715cfeed48ccbfb82f76bb/preimage.8 deleted file mode 100644 index 110e70f7ee7b..000000000000 --- a/rr-cache/e7c9aafc2297a37f89715cfeed48ccbfb82f76bb/preimage.8 +++ /dev/null @@ -1,2244 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2022 Intel Corporation - */ - -#include "xe_guc_submit.h" - -#include <linux/bitfield.h> -#include <linux/bitmap.h> -#include <linux/circ_buf.h> -#include <linux/delay.h> -#include <linux/dma-fence-array.h> -#include <linux/math64.h> - -#include <drm/drm_managed.h> - -#include "abi/guc_actions_abi.h" -#include "abi/guc_klvs_abi.h" -#include "regs/xe_lrc_layout.h" -#include "xe_assert.h" -#include "xe_devcoredump.h" -#include "xe_device.h" -#include "xe_exec_queue.h" -#include "xe_force_wake.h" -#include "xe_gpu_scheduler.h" -#include "xe_gt.h" -#include "xe_gt_clock.h" -#include "xe_gt_printk.h" -#include "xe_guc.h" -#include "xe_guc_ct.h" -#include "xe_guc_exec_queue_types.h" -#include "xe_guc_id_mgr.h" -#include "xe_guc_submit_types.h" -#include "xe_hw_engine.h" -#include "xe_hw_fence.h" -#include "xe_lrc.h" -#include "xe_macros.h" -#include "xe_map.h" -#include "xe_mocs.h" -#include "xe_pm.h" -#include "xe_ring_ops_types.h" -#include "xe_sched_job.h" -#include "xe_trace.h" -#include "xe_vm.h" - -static struct xe_guc * -exec_queue_to_guc(struct xe_exec_queue *q) -{ - return &q->gt->uc.guc; -} - -/* - * Helpers for engine state, using an atomic as some of the bits can transition - * as the same time (e.g. a suspend can be happning at the same time as schedule - * engine done being processed). - */ -#define EXEC_QUEUE_STATE_REGISTERED (1 << 0) -#define EXEC_QUEUE_STATE_ENABLED (1 << 1) -#define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2) -#define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3) -#define EXEC_QUEUE_STATE_DESTROYED (1 << 4) -#define EXEC_QUEUE_STATE_SUSPENDED (1 << 5) -#define EXEC_QUEUE_STATE_RESET (1 << 6) -#define EXEC_QUEUE_STATE_KILLED (1 << 7) -#define EXEC_QUEUE_STATE_WEDGED (1 << 8) -#define EXEC_QUEUE_STATE_BANNED (1 << 9) -#define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10) -#define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11) - -static bool exec_queue_registered(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_REGISTERED; -} - -static void set_exec_queue_registered(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); -} - -static void clear_exec_queue_registered(struct xe_exec_queue *q) -{ - atomic_and(~EXEC_QUEUE_STATE_REGISTERED, &q->guc->state); -} - -static bool exec_queue_enabled(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_ENABLED; -} - -static void set_exec_queue_enabled(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_ENABLED, &q->guc->state); -} - -static void clear_exec_queue_enabled(struct xe_exec_queue *q) -{ - atomic_and(~EXEC_QUEUE_STATE_ENABLED, &q->guc->state); -} - -static bool exec_queue_pending_enable(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE; -} - -static void set_exec_queue_pending_enable(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); -} - -static void clear_exec_queue_pending_enable(struct xe_exec_queue *q) -{ - atomic_and(~EXEC_QUEUE_STATE_PENDING_ENABLE, &q->guc->state); -} - -static bool exec_queue_pending_disable(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE; -} - -static void set_exec_queue_pending_disable(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); -} - -static void clear_exec_queue_pending_disable(struct xe_exec_queue *q) -{ - atomic_and(~EXEC_QUEUE_STATE_PENDING_DISABLE, &q->guc->state); -} - -static bool exec_queue_destroyed(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_DESTROYED; -} - -static void set_exec_queue_destroyed(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state); -} - -static bool exec_queue_banned(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED; -} - -static void set_exec_queue_banned(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_BANNED, &q->guc->state); -} - -static bool exec_queue_suspended(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED; -} - -static void set_exec_queue_suspended(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); -} - -static void clear_exec_queue_suspended(struct xe_exec_queue *q) -{ - atomic_and(~EXEC_QUEUE_STATE_SUSPENDED, &q->guc->state); -} - -static bool exec_queue_reset(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_RESET; -} - -static void set_exec_queue_reset(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_RESET, &q->guc->state); -} - -static bool exec_queue_killed(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_KILLED; -} - -static void set_exec_queue_killed(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_KILLED, &q->guc->state); -} - -static bool exec_queue_wedged(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_WEDGED; -} - -static void set_exec_queue_wedged(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_WEDGED, &q->guc->state); -} - -static bool exec_queue_check_timeout(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_CHECK_TIMEOUT; -} - -static void set_exec_queue_check_timeout(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); -} - -static void clear_exec_queue_check_timeout(struct xe_exec_queue *q) -{ - atomic_and(~EXEC_QUEUE_STATE_CHECK_TIMEOUT, &q->guc->state); -} - -static bool exec_queue_extra_ref(struct xe_exec_queue *q) -{ - return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_EXTRA_REF; -} - -static void set_exec_queue_extra_ref(struct xe_exec_queue *q) -{ - atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state); -} - -static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q) -{ - return (atomic_read(&q->guc->state) & - (EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_KILLED | - EXEC_QUEUE_STATE_BANNED)); -} - -#ifdef CONFIG_PROVE_LOCKING -static int alloc_submit_wq(struct xe_guc *guc) -{ - int i; - - for (i = 0; i < NUM_SUBMIT_WQ; ++i) { - guc->submission_state.submit_wq_pool[i] = - alloc_ordered_workqueue("submit_wq", 0); - if (!guc->submission_state.submit_wq_pool[i]) - goto err_free; - } - - return 0; - -err_free: - while (i) - destroy_workqueue(guc->submission_state.submit_wq_pool[--i]); - - return -ENOMEM; -} - -static void free_submit_wq(struct xe_guc *guc) -{ - int i; - - for (i = 0; i < NUM_SUBMIT_WQ; ++i) - destroy_workqueue(guc->submission_state.submit_wq_pool[i]); -} - -static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) -{ - int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ; - - return guc->submission_state.submit_wq_pool[idx]; -} -#else -static int alloc_submit_wq(struct xe_guc *guc) -{ - return 0; -} - -static void free_submit_wq(struct xe_guc *guc) -{ - -} - -static struct workqueue_struct *get_submit_wq(struct xe_guc *guc) -{ - return NULL; -} -#endif - -static void guc_submit_fini(struct drm_device *drm, void *arg) -{ - struct xe_guc *guc = arg; - - xa_destroy(&guc->submission_state.exec_queue_lookup); - free_submit_wq(guc); -} - -static void guc_submit_wedged_fini(void *arg) -{ - struct xe_guc *guc = arg; - struct xe_exec_queue *q; - unsigned long index; - - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - if (exec_queue_wedged(q)) - xe_exec_queue_put(q); -} - -static const struct xe_exec_queue_ops guc_exec_queue_ops; - -static void primelockdep(struct xe_guc *guc) -{ - if (!IS_ENABLED(CONFIG_LOCKDEP)) - return; - - fs_reclaim_acquire(GFP_KERNEL); - - mutex_lock(&guc->submission_state.lock); - mutex_unlock(&guc->submission_state.lock); - - fs_reclaim_release(GFP_KERNEL); -} - -/** - * xe_guc_submit_init() - Initialize GuC submission. - * @guc: the &xe_guc to initialize - * @num_ids: number of GuC context IDs to use - * - * The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all - * GuC context IDs supported by the GuC firmware should be used for submission. - * - * Only VF drivers will have to provide explicit number of GuC context IDs - * that they can use for submission. - * - * Return: 0 on success or a negative error code on failure. - */ -int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids) -{ - struct xe_device *xe = guc_to_xe(guc); - struct xe_gt *gt = guc_to_gt(guc); - int err; - - err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock); - if (err) - return err; - - err = xe_guc_id_mgr_init(&guc->submission_state.idm, num_ids); - if (err) - return err; - - err = alloc_submit_wq(guc); - if (err) - return err; - - gt->exec_queue_ops = &guc_exec_queue_ops; - - xa_init(&guc->submission_state.exec_queue_lookup); - - primelockdep(guc); - - return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc); -} - -static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count) -{ - int i; - - lockdep_assert_held(&guc->submission_state.lock); - - for (i = 0; i < xa_count; ++i) - xa_erase(&guc->submission_state.exec_queue_lookup, q->guc->id + i); - - xe_guc_id_mgr_release_locked(&guc->submission_state.idm, - q->guc->id, q->width); -} - -static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) -{ - int ret; - void *ptr; - int i; - - /* - * Must use GFP_NOWAIT as this lock is in the dma fence signalling path, - * worse case user gets -ENOMEM on engine create and has to try again. - * - * FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent - * failure. - */ - lockdep_assert_held(&guc->submission_state.lock); - - ret = xe_guc_id_mgr_reserve_locked(&guc->submission_state.idm, - q->width); - if (ret < 0) - return ret; - - q->guc->id = ret; - - for (i = 0; i < q->width; ++i) { - ptr = xa_store(&guc->submission_state.exec_queue_lookup, - q->guc->id + i, q, GFP_NOWAIT); - if (IS_ERR(ptr)) { - ret = PTR_ERR(ptr); - goto err_release; - } - } - - return 0; - -err_release: - __release_guc_id(guc, q, i); - - return ret; -} - -static void release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q) -{ - mutex_lock(&guc->submission_state.lock); - __release_guc_id(guc, q, q->width); - mutex_unlock(&guc->submission_state.lock); -} - -struct exec_queue_policy { - u32 count; - struct guc_update_exec_queue_policy h2g; -}; - -static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy) -{ - size_t bytes = sizeof(policy->h2g.header) + - (sizeof(policy->h2g.klv[0]) * policy->count); - - return bytes / sizeof(u32); -} - -static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy, - u16 guc_id) -{ - policy->h2g.header.action = - XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES; - policy->h2g.header.guc_id = guc_id; - policy->count = 0; -} - -#define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \ -static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \ - u32 data) \ -{ \ - XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \ -\ - policy->h2g.klv[policy->count].kl = \ - FIELD_PREP(GUC_KLV_0_KEY, \ - GUC_CONTEXT_POLICIES_KLV_ID_##id) | \ - FIELD_PREP(GUC_KLV_0_LEN, 1); \ - policy->h2g.klv[policy->count].value = data; \ - policy->count++; \ -} - -MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM) -MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT) -MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY) -#undef MAKE_EXEC_QUEUE_POLICY_ADD - -static const int xe_exec_queue_prio_to_guc[] = { - [XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL, - [XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL, - [XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH, - [XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH, -}; - -static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) -{ - struct exec_queue_policy policy; - struct xe_device *xe = guc_to_xe(guc); - enum xe_exec_queue_priority prio = q->sched_props.priority; - u32 timeslice_us = q->sched_props.timeslice_us; - u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; - - xe_assert(xe, exec_queue_registered(q)); - - __guc_exec_queue_policy_start_klv(&policy, q->guc->id); - __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); - __guc_exec_queue_policy_add_execution_quantum(&policy, timeslice_us); - __guc_exec_queue_policy_add_preemption_timeout(&policy, preempt_timeout_us); - - xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, - __guc_exec_queue_policy_action_size(&policy), 0, 0); -} - -static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_exec_queue *q) -{ - struct exec_queue_policy policy; - - __guc_exec_queue_policy_start_klv(&policy, q->guc->id); - __guc_exec_queue_policy_add_preemption_timeout(&policy, 1); - - xe_guc_ct_send(&guc->ct, (u32 *)&policy.h2g, - __guc_exec_queue_policy_action_size(&policy), 0, 0); -} - -#define parallel_read(xe_, map_, field_) \ - xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ - field_) -#define parallel_write(xe_, map_, field_, val_) \ - xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \ - field_, val_) - -static void __register_mlrc_exec_queue(struct xe_guc *guc, - struct xe_exec_queue *q, - struct guc_ctxt_registration_info *info) -{ -#define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) - struct xe_device *xe = guc_to_xe(guc); - u32 action[MAX_MLRC_REG_SIZE]; - int len = 0; - int i; - - xe_assert(xe, xe_exec_queue_is_parallel(q)); - - action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; - action[len++] = info->flags; - action[len++] = info->context_idx; - action[len++] = info->engine_class; - action[len++] = info->engine_submit_mask; - action[len++] = info->wq_desc_lo; - action[len++] = info->wq_desc_hi; - action[len++] = info->wq_base_lo; - action[len++] = info->wq_base_hi; - action[len++] = info->wq_size; - action[len++] = q->width; - action[len++] = info->hwlrca_lo; - action[len++] = info->hwlrca_hi; - - for (i = 1; i < q->width; ++i) { - struct xe_lrc *lrc = q->lrc[i]; - - action[len++] = lower_32_bits(xe_lrc_descriptor(lrc)); - action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); - } - - xe_assert(xe, len <= MAX_MLRC_REG_SIZE); -#undef MAX_MLRC_REG_SIZE - - xe_guc_ct_send(&guc->ct, action, len, 0, 0); -} - -static void __register_exec_queue(struct xe_guc *guc, - struct guc_ctxt_registration_info *info) -{ - u32 action[] = { - XE_GUC_ACTION_REGISTER_CONTEXT, - info->flags, - info->context_idx, - info->engine_class, - info->engine_submit_mask, - info->wq_desc_lo, - info->wq_desc_hi, - info->wq_base_lo, - info->wq_base_hi, - info->wq_size, - info->hwlrca_lo, - info->hwlrca_hi, - }; - - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0); -} - -static void register_exec_queue(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct xe_lrc *lrc = q->lrc[0]; - struct guc_ctxt_registration_info info; - - xe_assert(xe, !exec_queue_registered(q)); - - memset(&info, 0, sizeof(info)); - info.context_idx = q->guc->id; - info.engine_class = xe_engine_class_to_guc_class(q->class); - info.engine_submit_mask = q->logical_mask; - info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc)); - info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc)); - info.flags = CONTEXT_REGISTRATION_FLAG_KMD; - - if (xe_exec_queue_is_parallel(q)) { - u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc); - struct iosys_map map = xe_lrc_parallel_map(lrc); - - info.wq_desc_lo = lower_32_bits(ggtt_addr + - offsetof(struct guc_submit_parallel_scratch, wq_desc)); - info.wq_desc_hi = upper_32_bits(ggtt_addr + - offsetof(struct guc_submit_parallel_scratch, wq_desc)); - info.wq_base_lo = lower_32_bits(ggtt_addr + - offsetof(struct guc_submit_parallel_scratch, wq[0])); - info.wq_base_hi = upper_32_bits(ggtt_addr + - offsetof(struct guc_submit_parallel_scratch, wq[0])); - info.wq_size = WQ_SIZE; - - q->guc->wqi_head = 0; - q->guc->wqi_tail = 0; - xe_map_memset(xe, &map, 0, 0, PARALLEL_SCRATCH_SIZE - WQ_SIZE); - parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE); - } - - /* - * We must keep a reference for LR engines if engine is registered with - * the GuC as jobs signal immediately and can't destroy an engine if the - * GuC has a reference to it. - */ - if (xe_exec_queue_is_lr(q)) - xe_exec_queue_get(q); - - set_exec_queue_registered(q); - trace_xe_exec_queue_register(q); - if (xe_exec_queue_is_parallel(q)) - __register_mlrc_exec_queue(guc, q, &info); - else - __register_exec_queue(guc, &info); - init_policies(guc, q); -} - -static u32 wq_space_until_wrap(struct xe_exec_queue *q) -{ - return (WQ_SIZE - q->guc->wqi_tail); -} - -static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); - unsigned int sleep_period_ms = 1; - -#define AVAILABLE_SPACE \ - CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE) - if (wqi_size > AVAILABLE_SPACE) { -try_again: - q->guc->wqi_head = parallel_read(xe, map, wq_desc.head); - if (wqi_size > AVAILABLE_SPACE) { - if (sleep_period_ms == 1024) { - xe_gt_reset_async(q->gt); - return -ENODEV; - } - - msleep(sleep_period_ms); - sleep_period_ms <<= 1; - goto try_again; - } - } -#undef AVAILABLE_SPACE - - return 0; -} - -static int wq_noop_append(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); - u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1; - - if (wq_wait_for_space(q, wq_space_until_wrap(q))) - return -ENODEV; - - xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw)); - - parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], - FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | - FIELD_PREP(WQ_LEN_MASK, len_dw)); - q->guc->wqi_tail = 0; - - return 0; -} - -static void wq_item_append(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); -#define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */ - u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)]; - u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32); - u32 len_dw = (wqi_size / sizeof(u32)) - 1; - int i = 0, j; - - if (wqi_size > wq_space_until_wrap(q)) { - if (wq_noop_append(q)) - return; - } - if (wq_wait_for_space(q, wqi_size)) - return; - - wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) | - FIELD_PREP(WQ_LEN_MASK, len_dw); - wqi[i++] = xe_lrc_descriptor(q->lrc[0]); - wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) | - FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64)); - wqi[i++] = 0; - for (j = 1; j < q->width; ++j) { - struct xe_lrc *lrc = q->lrc[j]; - - wqi[i++] = lrc->ring.tail / sizeof(u64); - } - - xe_assert(xe, i == wqi_size / sizeof(u32)); - - iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, - wq[q->guc->wqi_tail / sizeof(u32)])); - xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); - q->guc->wqi_tail += wqi_size; - xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE); - - xe_device_wmb(xe); - - map = xe_lrc_parallel_map(q->lrc[0]); - parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail); -} - -#define RESUME_PENDING ~0x0ull -static void submit_exec_queue(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct xe_lrc *lrc = q->lrc[0]; - u32 action[3]; - u32 g2h_len = 0; - u32 num_g2h = 0; - int len = 0; - bool extra_submit = false; - - xe_assert(xe, exec_queue_registered(q)); - - if (xe_exec_queue_is_parallel(q)) - wq_item_append(q); - else - xe_lrc_set_ring_tail(lrc, lrc->ring.tail); - - if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q)) - return; - - if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) { - action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET; - action[len++] = q->guc->id; - action[len++] = GUC_CONTEXT_ENABLE; - g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; - num_g2h = 1; - if (xe_exec_queue_is_parallel(q)) - extra_submit = true; - - q->guc->resume_time = RESUME_PENDING; - set_exec_queue_pending_enable(q); - set_exec_queue_enabled(q); - trace_xe_exec_queue_scheduling_enable(q); - } else { - action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; - action[len++] = q->guc->id; - trace_xe_exec_queue_submit(q); - } - - xe_guc_ct_send(&guc->ct, action, len, g2h_len, num_g2h); - - if (extra_submit) { - len = 0; - action[len++] = XE_GUC_ACTION_SCHED_CONTEXT; - action[len++] = q->guc->id; - trace_xe_exec_queue_submit(q); - - xe_guc_ct_send(&guc->ct, action, len, 0, 0); - } -} - -static struct dma_fence * -guc_exec_queue_run_job(struct drm_sched_job *drm_job) -{ - struct xe_sched_job *job = to_xe_sched_job(drm_job); - struct xe_exec_queue *q = job->q; - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - bool lr = xe_exec_queue_is_lr(q); - - xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || - exec_queue_banned(q) || exec_queue_suspended(q)); - - trace_xe_sched_job_run(job); - - if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) { - if (!exec_queue_registered(q)) - register_exec_queue(q); - if (!lr) /* LR jobs are emitted in the exec IOCTL */ - q->ring_ops->emit_job(job); - submit_exec_queue(q); - } - - if (lr) { - xe_sched_job_set_error(job, -EOPNOTSUPP); - return NULL; - } else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags)) { - return job->fence; - } else { - return dma_fence_get(job->fence); - } -} - -static void guc_exec_queue_free_job(struct drm_sched_job *drm_job) -{ - struct xe_sched_job *job = to_xe_sched_job(drm_job); - - xe_exec_queue_update_run_ticks(job->q); - - trace_xe_sched_job_free(job); - xe_sched_job_put(job); -} - -static int guc_read_stopped(struct xe_guc *guc) -{ - return atomic_read(&guc->submission_state.stopped); -} - -#define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \ - u32 action[] = { \ - XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \ - q->guc->id, \ - GUC_CONTEXT_##enable_disable, \ - } - -static void disable_scheduling_deregister(struct xe_guc *guc, - struct xe_exec_queue *q) -{ - MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); - struct xe_device *xe = guc_to_xe(guc); - int ret; - - set_min_preemption_timeout(guc, q); - smp_rmb(); - ret = wait_event_timeout(guc->ct.wq, !exec_queue_pending_enable(q) || - guc_read_stopped(guc), HZ * 5); - if (!ret) { - struct xe_gpu_scheduler *sched = &q->guc->sched; - - drm_warn(&xe->drm, "Pending enable failed to respond"); - xe_sched_submission_start(sched); - xe_gt_reset_async(q->gt); - xe_sched_tdr_queue_imm(sched); - return; - } - - clear_exec_queue_enabled(q); - set_exec_queue_pending_disable(q); - set_exec_queue_destroyed(q); - trace_xe_exec_queue_scheduling_disable(q); - - /* - * Reserve space for both G2H here as the 2nd G2H is sent from a G2H - * handler and we are not allowed to reserved G2H space in handlers. - */ - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET + - G2H_LEN_DW_DEREGISTER_CONTEXT, 2); -} - -static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - - /** to wakeup xe_wait_user_fence ioctl if exec queue is reset */ - wake_up_all(&xe->ufence_wq); - - if (xe_exec_queue_is_lr(q)) - queue_work(guc_to_gt(guc)->ordered_wq, &q->guc->lr_tdr); - else - xe_sched_tdr_queue_imm(&q->guc->sched); -} - -/** - * xe_guc_submit_wedge() - Wedge GuC submission - * @guc: the GuC object - * - * Save exec queue's registered with GuC state by taking a ref to each queue. - * Register a DRMM handler to drop refs upon driver unload. - */ -void xe_guc_submit_wedge(struct xe_guc *guc) -{ - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - unsigned long index; - int err; - - xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); - - err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, - guc_submit_wedged_fini, guc); - if (err) { - drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); - return; - } - - mutex_lock(&guc->submission_state.lock); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - if (xe_exec_queue_get_unless_zero(q)) - set_exec_queue_wedged(q); - mutex_unlock(&guc->submission_state.lock); -} - -static bool guc_submit_hint_wedged(struct xe_guc *guc) -{ - struct xe_device *xe = guc_to_xe(guc); - - if (xe->wedged.mode != 2) - return false; - - if (xe_device_wedged(xe)) - return true; - - xe_device_declare_wedged(xe); - - return true; -} - -static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) -{ - struct xe_guc_exec_queue *ge = - container_of(w, struct xe_guc_exec_queue, lr_tdr); - struct xe_exec_queue *q = ge->q; - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct xe_gpu_scheduler *sched = &ge->sched; - bool wedged; - - xe_assert(xe, xe_exec_queue_is_lr(q)); - trace_xe_exec_queue_lr_cleanup(q); - - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); - - /* Kill the run_job / process_msg entry points */ - xe_sched_submission_stop(sched); - - /* - * Engine state now mostly stable, disable scheduling / deregister if - * needed. This cleanup routine might be called multiple times, where - * the actual async engine deregister drops the final engine ref. - * Calling disable_scheduling_deregister will mark the engine as - * destroyed and fire off the CT requests to disable scheduling / - * deregister, which we only want to do once. We also don't want to mark - * the engine as pending_disable again as this may race with the - * xe_guc_deregister_done_handler() which treats it as an unexpected - * state. - */ - if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { - struct xe_guc *guc = exec_queue_to_guc(q); - int ret; - - set_exec_queue_banned(q); - disable_scheduling_deregister(guc, q); - - /* - * Must wait for scheduling to be disabled before signalling - * any fences, if GT broken the GT reset code should signal us. - */ - ret = wait_event_timeout(guc->ct.wq, - !exec_queue_pending_disable(q) || - guc_read_stopped(guc), HZ * 5); - if (!ret) { - drm_warn(&xe->drm, "Schedule disable failed to respond"); - xe_sched_submission_start(sched); - xe_gt_reset_async(q->gt); - return; - } - } - - xe_sched_submission_start(sched); -} - -#define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100) - -static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) -{ - struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); - u32 ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]); - u32 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]); - u32 timeout_ms = q->sched_props.job_timeout_ms; - u32 diff; - u64 running_time_ms; - - /* - * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch - * possible overflows with a high timeout. - */ - xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC); - - if (ctx_timestamp < ctx_job_timestamp) - diff = ctx_timestamp + U32_MAX - ctx_job_timestamp; - else - diff = ctx_timestamp - ctx_job_timestamp; - - /* - * Ensure timeout is within 5% to account for an GuC scheduling latency - */ - running_time_ms = - ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff)); - - xe_gt_dbg(gt, - "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x", - xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), - q->guc->id, running_time_ms, timeout_ms, diff); - - return running_time_ms >= timeout_ms; -} - -static void enable_scheduling(struct xe_exec_queue *q) -{ - MAKE_SCHED_CONTEXT_ACTION(q, ENABLE); - struct xe_guc *guc = exec_queue_to_guc(q); - int ret; - - xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); - xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); - - set_exec_queue_pending_enable(q); - set_exec_queue_enabled(q); - trace_xe_exec_queue_scheduling_enable(q); - - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); - - ret = wait_event_timeout(guc->ct.wq, - !exec_queue_pending_enable(q) || - guc_read_stopped(guc), HZ * 5); - if (!ret || guc_read_stopped(guc)) { - xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond"); - set_exec_queue_banned(q); - xe_gt_reset_async(q->gt); - xe_sched_tdr_queue_imm(&q->guc->sched); - } -} - -static void disable_scheduling(struct xe_exec_queue *q, bool immediate) -{ - MAKE_SCHED_CONTEXT_ACTION(q, DISABLE); - struct xe_guc *guc = exec_queue_to_guc(q); - - xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); - xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); - - if (immediate) - set_min_preemption_timeout(guc, q); - clear_exec_queue_enabled(q); - set_exec_queue_pending_disable(q); - trace_xe_exec_queue_scheduling_disable(q); - - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, 1); -} - -static void __deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) -{ - u32 action[] = { - XE_GUC_ACTION_DEREGISTER_CONTEXT, - q->guc->id, - }; - - xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q)); - xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); - - set_exec_queue_destroyed(q); - trace_xe_exec_queue_deregister(q); - - xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), - G2H_LEN_DW_DEREGISTER_CONTEXT, 1); -} - -static enum drm_gpu_sched_stat -guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) -{ - struct xe_sched_job *job = to_xe_sched_job(drm_job); - struct xe_sched_job *tmp_job; - struct xe_exec_queue *q = job->q; - struct xe_gpu_scheduler *sched = &q->guc->sched; - struct xe_guc *guc = exec_queue_to_guc(q); -<<<<<<< -======= - const char *process_name = "no process"; ->>>>>>> - int err = -ETIME; - int i = 0; - bool wedged, skip_timeout_check; - - /* - * TDR has fired before free job worker. Common if exec queue - * immediately closed after last fence signaled. - */ - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - guc_exec_queue_free_job(drm_job); - - return DRM_GPU_SCHED_STAT_NOMINAL; - } - - /* Kill the run_job entry point */ - xe_sched_submission_stop(sched); - - /* Must check all state after stopping scheduler */ - skip_timeout_check = exec_queue_reset(q) || - exec_queue_killed_or_banned_or_wedged(q) || - exec_queue_destroyed(q); - - /* Job hasn't started, can't be timed out */ - if (!skip_timeout_check && !xe_sched_job_started(job)) - goto rearm; - - /* - * XXX: Sampling timeout doesn't work in wedged mode as we have to - * modify scheduling state to read timestamp. We could read the - * timestamp from a register to accumulate current running time but this - * doesn't work for SRIOV. For now assuming timeouts in wedged mode are - * genuine timeouts. - */ - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); - - /* Engine state now stable, disable scheduling to check timestamp */ - if (!wedged && exec_queue_registered(q)) { - int ret; - - if (exec_queue_reset(q)) - err = -EIO; - - if (!exec_queue_destroyed(q)) { - /* - * Wait for any pending G2H to flush out before - * modifying state - */ - ret = wait_event_timeout(guc->ct.wq, - !exec_queue_pending_enable(q) || - guc_read_stopped(guc), HZ * 5); - if (!ret || guc_read_stopped(guc)) - goto trigger_reset; - - /* - * Flag communicates to G2H handler that schedule - * disable originated from a timeout check. The G2H then - * avoid triggering cleanup or deregistering the exec - * queue. - */ - set_exec_queue_check_timeout(q); - disable_scheduling(q, skip_timeout_check); - } - - /* - * Must wait for scheduling to be disabled before signalling - * any fences, if GT broken the GT reset code should signal us. - * - * FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault - * error) messages which can cause the schedule disable to get - * lost. If this occurs, trigger a GT reset to recover. - */ - smp_rmb(); - ret = wait_event_timeout(guc->ct.wq, - !exec_queue_pending_disable(q) || - guc_read_stopped(guc), HZ * 5); - if (!ret || guc_read_stopped(guc)) { -trigger_reset: - if (!ret) - xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond"); - set_exec_queue_extra_ref(q); - xe_exec_queue_get(q); /* GT reset owns this */ - set_exec_queue_banned(q); - xe_gt_reset_async(q->gt); - xe_sched_tdr_queue_imm(sched); - goto rearm; - } - } - - /* - * Check if job is actually timed out, if so restart job execution and TDR - */ - if (!wedged && !skip_timeout_check && !check_timeout(q, job) && - !exec_queue_reset(q) && exec_queue_registered(q)) { - clear_exec_queue_check_timeout(q); - goto sched_enable; - } - -<<<<<<< - if (q->vm && q->vm->xef) { - process_name = q->vm->xef->process_name; - pid = q->vm->xef->pid; - } - xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]", - xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), - q->guc->id, q->flags, process_name, pid); - -======= - xe_gt_notice(guc_to_gt(guc), "Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", - xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), - q->guc->id, q->flags); ->>>>>>> - trace_xe_sched_job_timedout(job); - - if (!exec_queue_killed(q)) - xe_devcoredump(job); - - /* - * Kernel jobs should never fail, nor should VM jobs if they do - * somethings has gone wrong and the GT needs a reset - */ - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL, - "Kernel-submitted job timed out\n"); - xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q), - "VM job timed out on non-killed execqueue\n"); - if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL || - (q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) { - if (!xe_sched_invalidate_job(job, 2)) { - clear_exec_queue_check_timeout(q); - xe_gt_reset_async(q->gt); - goto rearm; - } - } - - /* Finish cleaning up exec queue via deregister */ - set_exec_queue_banned(q); - if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) { - set_exec_queue_extra_ref(q); - xe_exec_queue_get(q); - __deregister_exec_queue(guc, q); - } - - /* Stop fence signaling */ - xe_hw_fence_irq_stop(q->fence_irq); - - /* - * Fence state now stable, stop / start scheduler which cleans up any - * fences that are complete - */ - xe_sched_add_pending_job(sched, job); - xe_sched_submission_start(sched); - - xe_guc_exec_queue_trigger_cleanup(q); - - /* Mark all outstanding jobs as bad, thus completing them */ - spin_lock(&sched->base.job_list_lock); - list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list) - xe_sched_job_set_error(tmp_job, !i++ ? err : -ECANCELED); - spin_unlock(&sched->base.job_list_lock); - - /* Start fence signaling */ - xe_hw_fence_irq_start(q->fence_irq); - - return DRM_GPU_SCHED_STAT_NOMINAL; - -sched_enable: - enable_scheduling(q); -rearm: - /* - * XXX: Ideally want to adjust timeout based on current exection time - * but there is not currently an easy way to do in DRM scheduler. With - * some thought, do this in a follow up. - */ - xe_sched_add_pending_job(sched, job); - xe_sched_submission_start(sched); - - return DRM_GPU_SCHED_STAT_NOMINAL; -} - -static void __guc_exec_queue_fini_async(struct work_struct *w) -{ - struct xe_guc_exec_queue *ge = - container_of(w, struct xe_guc_exec_queue, fini_async); - struct xe_exec_queue *q = ge->q; - struct xe_guc *guc = exec_queue_to_guc(q); - - xe_pm_runtime_get(guc_to_xe(guc)); - trace_xe_exec_queue_destroy(q); - - if (xe_exec_queue_is_lr(q)) - cancel_work_sync(&ge->lr_tdr); - release_guc_id(guc, q); - xe_sched_entity_fini(&ge->entity); - xe_sched_fini(&ge->sched); - - kfree(ge); - xe_exec_queue_fini(q); - xe_pm_runtime_put(guc_to_xe(guc)); -} - -static void guc_exec_queue_fini_async(struct xe_exec_queue *q) -{ - INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async); - - /* We must block on kernel engines so slabs are empty on driver unload */ - if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q)) - __guc_exec_queue_fini_async(&q->guc->fini_async); - else - queue_work(system_wq, &q->guc->fini_async); -} - -static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q) -{ - /* - * Might be done from within the GPU scheduler, need to do async as we - * fini the scheduler when the engine is fini'd, the scheduler can't - * complete fini within itself (circular dependency). Async resolves - * this we and don't really care when everything is fini'd, just that it - * is. - */ - guc_exec_queue_fini_async(q); -} - -static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) -{ - struct xe_exec_queue *q = msg->private_data; - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - - xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); - trace_xe_exec_queue_cleanup_entity(q); - - if (exec_queue_registered(q)) - disable_scheduling_deregister(guc, q); - else - __guc_exec_queue_fini(guc, q); -} - -static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q) -{ - return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q); -} - -static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg) -{ - struct xe_exec_queue *q = msg->private_data; - struct xe_guc *guc = exec_queue_to_guc(q); - - if (guc_exec_queue_allowed_to_change_state(q)) - init_policies(guc, q); - kfree(msg); -} - -static void suspend_fence_signal(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - - xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) || - guc_read_stopped(guc)); - xe_assert(xe, q->guc->suspend_pending); - - q->guc->suspend_pending = false; - smp_wmb(); - wake_up(&q->guc->suspend_wait); -} - -static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg) -{ - struct xe_exec_queue *q = msg->private_data; - struct xe_guc *guc = exec_queue_to_guc(q); - - if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) && - exec_queue_enabled(q)) { - wait_event(guc->ct.wq, q->guc->resume_time != RESUME_PENDING || - guc_read_stopped(guc)); - - if (!guc_read_stopped(guc)) { - s64 since_resume_ms = - ktime_ms_delta(ktime_get(), - q->guc->resume_time); - s64 wait_ms = q->vm->preempt.min_run_period_ms - - since_resume_ms; - - if (wait_ms > 0 && q->guc->resume_time) - msleep(wait_ms); - - set_exec_queue_suspended(q); - disable_scheduling(q, false); - } - } else if (q->guc->suspend_pending) { - set_exec_queue_suspended(q); - suspend_fence_signal(q); - } -} - -static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) -{ - struct xe_exec_queue *q = msg->private_data; - - if (guc_exec_queue_allowed_to_change_state(q)) { - q->guc->resume_time = RESUME_PENDING; - clear_exec_queue_suspended(q); - enable_scheduling(q); - } else { - clear_exec_queue_suspended(q); - } -} - -#define CLEANUP 1 /* Non-zero values to catch uninitialized msg */ -#define SET_SCHED_PROPS 2 -#define SUSPEND 3 -#define RESUME 4 - -static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) -{ - trace_xe_sched_msg_recv(msg); - - switch (msg->opcode) { - case CLEANUP: - __guc_exec_queue_process_msg_cleanup(msg); - break; - case SET_SCHED_PROPS: - __guc_exec_queue_process_msg_set_sched_props(msg); - break; - case SUSPEND: - __guc_exec_queue_process_msg_suspend(msg); - break; - case RESUME: - __guc_exec_queue_process_msg_resume(msg); - break; - default: - XE_WARN_ON("Unknown message type"); - } - - xe_pm_runtime_put(guc_to_xe(exec_queue_to_guc(msg->private_data))); -} - -static const struct drm_sched_backend_ops drm_sched_ops = { - .run_job = guc_exec_queue_run_job, - .free_job = guc_exec_queue_free_job, - .timedout_job = guc_exec_queue_timedout_job, -}; - -static const struct xe_sched_backend_ops xe_sched_ops = { - .process_msg = guc_exec_queue_process_msg, -}; - -static int guc_exec_queue_init(struct xe_exec_queue *q) -{ - struct xe_gpu_scheduler *sched; - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct xe_guc_exec_queue *ge; - long timeout; - int err; - - xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); - - ge = kzalloc(sizeof(*ge), GFP_KERNEL); - if (!ge) - return -ENOMEM; - - q->guc = ge; - ge->q = q; - init_waitqueue_head(&ge->suspend_wait); - - timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT : - msecs_to_jiffies(q->sched_props.job_timeout_ms); - err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops, - get_submit_wq(guc), - q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64, - timeout, guc_to_gt(guc)->ordered_wq, NULL, - q->name, gt_to_xe(q->gt)->drm.dev); - if (err) - goto err_free; - - sched = &ge->sched; - err = xe_sched_entity_init(&ge->entity, sched); - if (err) - goto err_sched; - - if (xe_exec_queue_is_lr(q)) - INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup); - - mutex_lock(&guc->submission_state.lock); - - err = alloc_guc_id(guc, q); - if (err) - goto err_entity; - - q->entity = &ge->entity; - - if (guc_read_stopped(guc)) - xe_sched_stop(sched); - - mutex_unlock(&guc->submission_state.lock); - - xe_exec_queue_assign_name(q, q->guc->id); - - trace_xe_exec_queue_create(q); - - return 0; - -err_entity: - mutex_unlock(&guc->submission_state.lock); - xe_sched_entity_fini(&ge->entity); -err_sched: - xe_sched_fini(&ge->sched); -err_free: - kfree(ge); - - return err; -} - -static void guc_exec_queue_kill(struct xe_exec_queue *q) -{ - trace_xe_exec_queue_kill(q); - set_exec_queue_killed(q); - xe_guc_exec_queue_trigger_cleanup(q); -} - -static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, - u32 opcode) -{ - xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); - - INIT_LIST_HEAD(&msg->link); - msg->opcode = opcode; - msg->private_data = q; - - trace_xe_sched_msg_add(msg); - xe_sched_add_msg(&q->guc->sched, msg); -} - -#define STATIC_MSG_CLEANUP 0 -#define STATIC_MSG_SUSPEND 1 -#define STATIC_MSG_RESUME 2 -static void guc_exec_queue_fini(struct xe_exec_queue *q) -{ - struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP; - - if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q)) - guc_exec_queue_add_msg(q, msg, CLEANUP); - else - __guc_exec_queue_fini(exec_queue_to_guc(q), q); -} - -static int guc_exec_queue_set_priority(struct xe_exec_queue *q, - enum xe_exec_queue_priority priority) -{ - struct xe_sched_msg *msg; - - if (q->sched_props.priority == priority || - exec_queue_killed_or_banned_or_wedged(q)) - return 0; - - msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (!msg) - return -ENOMEM; - - q->sched_props.priority = priority; - guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); - - return 0; -} - -static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) -{ - struct xe_sched_msg *msg; - - if (q->sched_props.timeslice_us == timeslice_us || - exec_queue_killed_or_banned_or_wedged(q)) - return 0; - - msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (!msg) - return -ENOMEM; - - q->sched_props.timeslice_us = timeslice_us; - guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); - - return 0; -} - -static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, - u32 preempt_timeout_us) -{ - struct xe_sched_msg *msg; - - if (q->sched_props.preempt_timeout_us == preempt_timeout_us || - exec_queue_killed_or_banned_or_wedged(q)) - return 0; - - msg = kmalloc(sizeof(*msg), GFP_KERNEL); - if (!msg) - return -ENOMEM; - - q->sched_props.preempt_timeout_us = preempt_timeout_us; - guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS); - - return 0; -} - -static int guc_exec_queue_suspend(struct xe_exec_queue *q) -{ - struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND; - - if (exec_queue_killed_or_banned_or_wedged(q) || q->guc->suspend_pending) - return -EINVAL; - - q->guc->suspend_pending = true; - guc_exec_queue_add_msg(q, msg, SUSPEND); - - return 0; -} - -static void guc_exec_queue_suspend_wait(struct xe_exec_queue *q) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - - wait_event(q->guc->suspend_wait, !q->guc->suspend_pending || - guc_read_stopped(guc)); -} - -static void guc_exec_queue_resume(struct xe_exec_queue *q) -{ - struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - - xe_assert(xe, !q->guc->suspend_pending); - - guc_exec_queue_add_msg(q, msg, RESUME); -} - -static bool guc_exec_queue_reset_status(struct xe_exec_queue *q) -{ - return exec_queue_reset(q) || exec_queue_killed_or_banned_or_wedged(q); -} - -/* - * All of these functions are an abstraction layer which other parts of XE can - * use to trap into the GuC backend. All of these functions, aside from init, - * really shouldn't do much other than trap into the DRM scheduler which - * synchronizes these operations. - */ -static const struct xe_exec_queue_ops guc_exec_queue_ops = { - .init = guc_exec_queue_init, - .kill = guc_exec_queue_kill, - .fini = guc_exec_queue_fini, - .set_priority = guc_exec_queue_set_priority, - .set_timeslice = guc_exec_queue_set_timeslice, - .set_preempt_timeout = guc_exec_queue_set_preempt_timeout, - .suspend = guc_exec_queue_suspend, - .suspend_wait = guc_exec_queue_suspend_wait, - .resume = guc_exec_queue_resume, - .reset_status = guc_exec_queue_reset_status, -}; - -static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) -{ - struct xe_gpu_scheduler *sched = &q->guc->sched; - - /* Stop scheduling + flush any DRM scheduler operations */ - xe_sched_submission_stop(sched); - - /* Clean up lost G2H + reset engine state */ - if (exec_queue_registered(q)) { - if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) - xe_exec_queue_put(q); - else if (exec_queue_destroyed(q)) - __guc_exec_queue_fini(guc, q); - } - if (q->guc->suspend_pending) { - set_exec_queue_suspended(q); - suspend_fence_signal(q); - } - atomic_and(EXEC_QUEUE_STATE_WEDGED | EXEC_QUEUE_STATE_BANNED | - EXEC_QUEUE_STATE_KILLED | EXEC_QUEUE_STATE_DESTROYED | - EXEC_QUEUE_STATE_SUSPENDED, - &q->guc->state); - q->guc->resume_time = 0; - trace_xe_exec_queue_stop(q); - - /* - * Ban any engine (aside from kernel and engines used for VM ops) with a - * started but not complete job or if a job has gone through a GT reset - * more than twice. - */ - if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_VM))) { - struct xe_sched_job *job = xe_sched_first_pending_job(sched); - bool ban = false; - - if (job) { - if ((xe_sched_job_started(job) && - !xe_sched_job_completed(job)) || - xe_sched_invalidate_job(job, 2)) { - trace_xe_sched_job_ban(job); - ban = true; - } - } else if (xe_exec_queue_is_lr(q) && - (xe_lrc_ring_head(q->lrc[0]) != xe_lrc_ring_tail(q->lrc[0]))) { - ban = true; - } - - if (ban) { - set_exec_queue_banned(q); - xe_guc_exec_queue_trigger_cleanup(q); - } - } -} - -int xe_guc_submit_reset_prepare(struct xe_guc *guc) -{ - int ret; - - /* - * Using an atomic here rather than submission_state.lock as this - * function can be called while holding the CT lock (engine reset - * failure). submission_state.lock needs the CT lock to resubmit jobs. - * Atomic is not ideal, but it works to prevent against concurrent reset - * and releasing any TDRs waiting on guc->submission_state.stopped. - */ - ret = atomic_fetch_or(1, &guc->submission_state.stopped); - smp_wmb(); - wake_up_all(&guc->ct.wq); - - return ret; -} - -void xe_guc_submit_reset_wait(struct xe_guc *guc) -{ - wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) || - !guc_read_stopped(guc)); -} - -void xe_guc_submit_stop(struct xe_guc *guc) -{ - struct xe_exec_queue *q; - unsigned long index; - struct xe_device *xe = guc_to_xe(guc); - - xe_assert(xe, guc_read_stopped(guc) == 1); - - mutex_lock(&guc->submission_state.lock); - - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - guc_exec_queue_stop(guc, q); - - mutex_unlock(&guc->submission_state.lock); - - /* - * No one can enter the backend at this point, aside from new engine - * creation which is protected by guc->submission_state.lock. - */ - -} - -static void guc_exec_queue_start(struct xe_exec_queue *q) -{ - struct xe_gpu_scheduler *sched = &q->guc->sched; - - if (!exec_queue_killed_or_banned_or_wedged(q)) { - int i; - - trace_xe_exec_queue_resubmit(q); - for (i = 0; i < q->width; ++i) - xe_lrc_set_ring_head(q->lrc[i], q->lrc[i]->ring.tail); - xe_sched_resubmit_jobs(sched); - } - - xe_sched_submission_start(sched); -} - -int xe_guc_submit_start(struct xe_guc *guc) -{ - struct xe_exec_queue *q; - unsigned long index; - struct xe_device *xe = guc_to_xe(guc); - - xe_assert(xe, guc_read_stopped(guc) == 1); - - mutex_lock(&guc->submission_state.lock); - atomic_dec(&guc->submission_state.stopped); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - guc_exec_queue_start(q); - mutex_unlock(&guc->submission_state.lock); - - wake_up_all(&guc->ct.wq); - - return 0; -} - -static struct xe_exec_queue * -g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) -{ - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - - if (unlikely(guc_id >= GUC_ID_MAX)) { - drm_err(&xe->drm, "Invalid guc_id %u", guc_id); - return NULL; - } - - q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); - if (unlikely(!q)) { - drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); - return NULL; - } - - xe_assert(xe, guc_id >= q->guc->id); - xe_assert(xe, guc_id < (q->guc->id + q->width)); - - return q; -} - -static void deregister_exec_queue(struct xe_guc *guc, struct xe_exec_queue *q) -{ - u32 action[] = { - XE_GUC_ACTION_DEREGISTER_CONTEXT, - q->guc->id, - }; - - xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q)); - xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q)); - xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q)); - - trace_xe_exec_queue_deregister(q); - - xe_guc_ct_send_g2h_handler(&guc->ct, action, ARRAY_SIZE(action)); -} - -static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, - u32 runnable_state) -{ - trace_xe_exec_queue_scheduling_done(q); - - if (runnable_state == 1) { - xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q)); - - q->guc->resume_time = ktime_get(); - clear_exec_queue_pending_enable(q); - smp_wmb(); - wake_up_all(&guc->ct.wq); - } else { - bool check_timeout = exec_queue_check_timeout(q); - - xe_gt_assert(guc_to_gt(guc), runnable_state == 0); - xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q)); - - clear_exec_queue_pending_disable(q); - if (q->guc->suspend_pending) { - suspend_fence_signal(q); - } else { - if (exec_queue_banned(q) || check_timeout) { - smp_wmb(); - wake_up_all(&guc->ct.wq); - } - if (!check_timeout) - deregister_exec_queue(guc, q); - } - } -} - -int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - u32 guc_id = msg[0]; - u32 runnable_state = msg[1]; - - if (unlikely(len < 2)) { - drm_err(&xe->drm, "Invalid length %u", len); - return -EPROTO; - } - - q = g2h_exec_queue_lookup(guc, guc_id); - if (unlikely(!q)) - return -EPROTO; - - if (unlikely(!exec_queue_pending_enable(q) && - !exec_queue_pending_disable(q))) { - xe_gt_err(guc_to_gt(guc), - "SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u", - atomic_read(&q->guc->state), q->guc->id, - runnable_state); - return -EPROTO; - } - - handle_sched_done(guc, q, runnable_state); - - return 0; -} - -static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) -{ - trace_xe_exec_queue_deregister_done(q); - - clear_exec_queue_registered(q); - - if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q)) - xe_exec_queue_put(q); - else - __guc_exec_queue_fini(guc, q); -} - -int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - u32 guc_id = msg[0]; - - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); - return -EPROTO; - } - - q = g2h_exec_queue_lookup(guc, guc_id); - if (unlikely(!q)) - return -EPROTO; - - if (!exec_queue_destroyed(q) || exec_queue_pending_disable(q) || - exec_queue_pending_enable(q) || exec_queue_enabled(q)) { - xe_gt_err(guc_to_gt(guc), - "DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d", - atomic_read(&q->guc->state), q->guc->id); - return -EPROTO; - } - - handle_deregister_done(guc, q); - - return 0; -} - -int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - u32 guc_id = msg[0]; - - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); - return -EPROTO; - } - - q = g2h_exec_queue_lookup(guc, guc_id); - if (unlikely(!q)) - return -EPROTO; - - xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d", - xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); - - /* FIXME: Do error capture, most likely async */ - - trace_xe_exec_queue_reset(q); - - /* - * A banned engine is a NOP at this point (came from - * guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel - * jobs by setting timeout of the job to the minimum value kicking - * guc_exec_queue_timedout_job. - */ - set_exec_queue_reset(q); - if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) - xe_guc_exec_queue_trigger_cleanup(q); - - return 0; -} - -int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, - u32 len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = guc_to_xe(guc); - struct xe_exec_queue *q; - u32 guc_id = msg[0]; - - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); - return -EPROTO; - } - - q = g2h_exec_queue_lookup(guc, guc_id); - if (unlikely(!q)) - return -EPROTO; - - xe_gt_dbg(gt, "Engine memory cat error: engine_class=%s, logical_mask: 0x%x, guc_id=%d", - xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); - - trace_xe_exec_queue_memory_cat_error(q); - - /* Treat the same as engine reset */ - set_exec_queue_reset(q); - if (!exec_queue_banned(q) && !exec_queue_check_timeout(q)) - xe_guc_exec_queue_trigger_cleanup(q); - - return 0; -} - -int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_device *xe = guc_to_xe(guc); - u8 guc_class, instance; - u32 reason; - - if (unlikely(len != 3)) { - drm_err(&xe->drm, "Invalid length %u", len); - return -EPROTO; - } - - guc_class = msg[0]; - instance = msg[1]; - reason = msg[2]; - - /* Unexpected failure of a hardware feature, log an actual error */ - drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", - guc_class, instance, reason); - - xe_gt_reset_async(guc_to_gt(guc)); - - return 0; -} - -static void -guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q, - struct xe_guc_submit_exec_queue_snapshot *snapshot) -{ - struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]); - int i; - - snapshot->guc.wqi_head = q->guc->wqi_head; - snapshot->guc.wqi_tail = q->guc->wqi_tail; - snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head); - snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail); - snapshot->parallel.wq_desc.status = parallel_read(xe, map, - wq_desc.wq_status); - - if (snapshot->parallel.wq_desc.head != - snapshot->parallel.wq_desc.tail) { - for (i = snapshot->parallel.wq_desc.head; - i != snapshot->parallel.wq_desc.tail; - i = (i + sizeof(u32)) % WQ_SIZE) - snapshot->parallel.wq[i / sizeof(u32)] = - parallel_read(xe, map, wq[i / sizeof(u32)]); - } -} - -static void -guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, - struct drm_printer *p) -{ - int i; - - drm_printf(p, "\tWQ head: %u (internal), %d (memory)\n", - snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head); - drm_printf(p, "\tWQ tail: %u (internal), %d (memory)\n", - snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail); - drm_printf(p, "\tWQ status: %u\n", snapshot->parallel.wq_desc.status); - - if (snapshot->parallel.wq_desc.head != - snapshot->parallel.wq_desc.tail) { - for (i = snapshot->parallel.wq_desc.head; - i != snapshot->parallel.wq_desc.tail; - i = (i + sizeof(u32)) % WQ_SIZE) - drm_printf(p, "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32), - snapshot->parallel.wq[i / sizeof(u32)]); - } -} - -/** - * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. - * @q: faulty exec queue - * - * This can be printed out in a later stage like during dev_coredump - * analysis. - * - * Returns: a GuC Submit Engine snapshot object that must be freed by the - * caller, using `xe_guc_exec_queue_snapshot_free`. - */ -struct xe_guc_submit_exec_queue_snapshot * -xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q) -{ - struct xe_gpu_scheduler *sched = &q->guc->sched; - struct xe_guc_submit_exec_queue_snapshot *snapshot; - int i; - - snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC); - - if (!snapshot) - return NULL; - - snapshot->guc.id = q->guc->id; - memcpy(&snapshot->name, &q->name, sizeof(snapshot->name)); - snapshot->class = q->class; - snapshot->logical_mask = q->logical_mask; - snapshot->width = q->width; - snapshot->refcount = kref_read(&q->refcount); - snapshot->sched_timeout = sched->base.timeout; - snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us; - snapshot->sched_props.preempt_timeout_us = - q->sched_props.preempt_timeout_us; - - snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *), - GFP_ATOMIC); - - if (snapshot->lrc) { - for (i = 0; i < q->width; ++i) { - struct xe_lrc *lrc = q->lrc[i]; - - snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc); - } - } - - snapshot->schedule_state = atomic_read(&q->guc->state); - snapshot->exec_queue_flags = q->flags; - - snapshot->parallel_execution = xe_exec_queue_is_parallel(q); - if (snapshot->parallel_execution) - guc_exec_queue_wq_snapshot_capture(q, snapshot); - - spin_lock(&sched->base.job_list_lock); - snapshot->pending_list_size = list_count_nodes(&sched->base.pending_list); - snapshot->pending_list = kmalloc_array(snapshot->pending_list_size, - sizeof(struct pending_list_snapshot), - GFP_ATOMIC); - - if (snapshot->pending_list) { - struct xe_sched_job *job_iter; - - i = 0; - list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) { - snapshot->pending_list[i].seqno = - xe_sched_job_seqno(job_iter); - snapshot->pending_list[i].fence = - dma_fence_is_signaled(job_iter->fence) ? 1 : 0; - snapshot->pending_list[i].finished = - dma_fence_is_signaled(&job_iter->drm.s_fence->finished) - ? 1 : 0; - i++; - } - } - - spin_unlock(&sched->base.job_list_lock); - - return snapshot; -} - -/** - * xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine. - * @snapshot: Previously captured snapshot of job. - * - * This captures some data that requires taking some locks, so it cannot be done in signaling path. - */ -void -xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot) -{ - int i; - - if (!snapshot || !snapshot->lrc) - return; - - for (i = 0; i < snapshot->width; ++i) - xe_lrc_snapshot_capture_delayed(snapshot->lrc[i]); -} - -/** - * xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot. - * @snapshot: GuC Submit Engine snapshot object. - * @p: drm_printer where it will be printed out. - * - * This function prints out a given GuC Submit Engine snapshot object. - */ -void -xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot, - struct drm_printer *p) -{ - int i; - - if (!snapshot) - return; - - drm_printf(p, "\nGuC ID: %d\n", snapshot->guc.id); - drm_printf(p, "\tName: %s\n", snapshot->name); - drm_printf(p, "\tClass: %d\n", snapshot->class); - drm_printf(p, "\tLogical mask: 0x%x\n", snapshot->logical_mask); - drm_printf(p, "\tWidth: %d\n", snapshot->width); - drm_printf(p, "\tRef: %d\n", snapshot->refcount); - drm_printf(p, "\tTimeout: %ld (ms)\n", snapshot->sched_timeout); - drm_printf(p, "\tTimeslice: %u (us)\n", - snapshot->sched_props.timeslice_us); - drm_printf(p, "\tPreempt timeout: %u (us)\n", - snapshot->sched_props.preempt_timeout_us); - - for (i = 0; snapshot->lrc && i < snapshot->width; ++i) - xe_lrc_snapshot_print(snapshot->lrc[i], p); - - drm_printf(p, "\tSchedule State: 0x%x\n", snapshot->schedule_state); - drm_printf(p, "\tFlags: 0x%lx\n", snapshot->exec_queue_flags); - - if (snapshot->parallel_execution) - guc_exec_queue_wq_snapshot_print(snapshot, p); - - for (i = 0; snapshot->pending_list && i < snapshot->pending_list_size; - i++) - drm_printf(p, "\tJob: seqno=%d, fence=%d, finished=%d\n", - snapshot->pending_list[i].seqno, - snapshot->pending_list[i].fence, - snapshot->pending_list[i].finished); -} - -/** - * xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given - * snapshot. - * @snapshot: GuC Submit Engine snapshot object. - * - * This function free all the memory that needed to be allocated at capture - * time. - */ -void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot) -{ - int i; - - if (!snapshot) - return; - - if (snapshot->lrc) { - for (i = 0; i < snapshot->width; i++) - xe_lrc_snapshot_free(snapshot->lrc[i]); - kfree(snapshot->lrc); - } - kfree(snapshot->pending_list); - kfree(snapshot); -} - -static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) -{ - struct xe_guc_submit_exec_queue_snapshot *snapshot; - - snapshot = xe_guc_exec_queue_snapshot_capture(q); - xe_guc_exec_queue_snapshot_print(snapshot, p); - xe_guc_exec_queue_snapshot_free(snapshot); -} - -/** - * xe_guc_submit_print - GuC Submit Print. - * @guc: GuC. - * @p: drm_printer where it will be printed out. - * - * This function capture and prints snapshots of **all** GuC Engines. - */ -void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p) -{ - struct xe_exec_queue *q; - unsigned long index; - - if (!xe_device_uc_enabled(guc_to_xe(guc))) - return; - - mutex_lock(&guc->submission_state.lock); - xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) - guc_exec_queue_print(q, p); - mutex_unlock(&guc->submission_state.lock); -} diff --git a/rr-cache/fb3ea476117ea020e695c3993ec0abec377d77a9/preimage b/rr-cache/fb3ea476117ea020e695c3993ec0abec377d77a9/preimage deleted file mode 100644 index edbde99cf66b..000000000000 --- a/rr-cache/fb3ea476117ea020e695c3993ec0abec377d77a9/preimage +++ /dev/null @@ -1,540 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2023 Intel Corporation - */ - -#include "xe_gt_tlb_invalidation.h" - -#include "abi/guc_actions_abi.h" -#include "xe_device.h" -#include "xe_force_wake.h" -#include "xe_gt.h" -#include "xe_gt_printk.h" -#include "xe_guc.h" -#include "xe_guc_ct.h" -#include "xe_mmio.h" -#include "xe_pm.h" -#include "xe_sriov.h" -#include "xe_trace.h" -#include "regs/xe_guc_regs.h" - -#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS - -/* - * TLB inval depends on pending commands in the CT queue and then the real - * invalidation time. Double up the time to process full CT queue - * just to be on the safe side. - */ -static long tlb_timeout_jiffies(struct xe_gt *gt) -{ - /* this reflects what HW/GuC needs to process TLB inv request */ - const long hw_tlb_timeout = HZ / 4; - - /* this estimates actual delay caused by the CTB transport */ - long delay = xe_guc_ct_queue_proc_time_jiffies(>->uc.guc.ct); - - return hw_tlb_timeout + 2 * delay; -} - -static void -__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); - - trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); - xe_gt_tlb_invalidation_fence_fini(fence); - dma_fence_signal(&fence->base); - if (!stack) - dma_fence_put(&fence->base); -} - -static void -invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - list_del(&fence->link); - __invalidation_fence_signal(xe, fence); -} - -static void xe_gt_tlb_fence_timeout(struct work_struct *work) -{ - struct xe_gt *gt = container_of(work, struct xe_gt, - tlb_invalidation.fence_tdr.work); - struct xe_device *xe = gt_to_xe(gt); - struct xe_gt_tlb_invalidation_fence *fence, *next; - - spin_lock_irq(>->tlb_invalidation.pending_lock); - list_for_each_entry_safe(fence, next, - >->tlb_invalidation.pending_fences, link) { - s64 since_inval_ms = ktime_ms_delta(ktime_get(), - fence->invalidation_time); - - if (msecs_to_jiffies(since_inval_ms) < tlb_timeout_jiffies(gt)) - break; - - trace_xe_gt_tlb_invalidation_fence_timeout(xe, fence); - xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d", - fence->seqno, gt->tlb_invalidation.seqno_recv); - - fence->base.error = -ETIME; - invalidation_fence_signal(xe, fence); - } - if (!list_empty(>->tlb_invalidation.pending_fences)) - queue_delayed_work(system_wq, - >->tlb_invalidation.fence_tdr, - tlb_timeout_jiffies(gt)); - spin_unlock_irq(>->tlb_invalidation.pending_lock); -} - -/** - * xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state - * @gt: graphics tile - * - * Initialize GT TLB invalidation state, purely software initialization, should - * be called once during driver load. - * - * Return: 0 on success, negative error code on error. - */ -int xe_gt_tlb_invalidation_init(struct xe_gt *gt) -{ - gt->tlb_invalidation.seqno = 1; - INIT_LIST_HEAD(>->tlb_invalidation.pending_fences); - spin_lock_init(>->tlb_invalidation.pending_lock); - spin_lock_init(>->tlb_invalidation.lock); - INIT_DELAYED_WORK(>->tlb_invalidation.fence_tdr, - xe_gt_tlb_fence_timeout); - - return 0; -} - -/** - * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset - * @gt: graphics tile - * - * Signal any pending invalidation fences, should be called during a GT reset - */ -void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) -{ - struct xe_gt_tlb_invalidation_fence *fence, *next; - int pending_seqno; - - /* - * CT channel is already disabled at this point. No new TLB requests can - * appear. - */ - - mutex_lock(>->uc.guc.ct.lock); - spin_lock_irq(>->tlb_invalidation.pending_lock); - cancel_delayed_work(>->tlb_invalidation.fence_tdr); - /* - * We might have various kworkers waiting for TLB flushes to complete - * which are not tracked with an explicit TLB fence, however at this - * stage that will never happen since the CT is already disabled, so - * make sure we signal them here under the assumption that we have - * completed a full GT reset. - */ - if (gt->tlb_invalidation.seqno == 1) - pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1; - else - pending_seqno = gt->tlb_invalidation.seqno - 1; - WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno); - - list_for_each_entry_safe(fence, next, - >->tlb_invalidation.pending_fences, link) - invalidation_fence_signal(gt_to_xe(gt), fence); - spin_unlock_irq(>->tlb_invalidation.pending_lock); - mutex_unlock(>->uc.guc.ct.lock); -} - -static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno) -{ - int seqno_recv = READ_ONCE(gt->tlb_invalidation.seqno_recv); - - if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2)) - return false; - - if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2)) - return true; - - return seqno_recv >= seqno; -} - -static int send_tlb_invalidation(struct xe_guc *guc, - struct xe_gt_tlb_invalidation_fence *fence, - u32 *action, int len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = gt_to_xe(gt); - int seqno; - int ret; - - xe_gt_assert(gt, fence); - - /* - * XXX: The seqno algorithm relies on TLB invalidation being processed - * in order which they currently are, if that changes the algorithm will - * need to be updated. - */ - - mutex_lock(&guc->ct.lock); - seqno = gt->tlb_invalidation.seqno; - fence->seqno = seqno; - trace_xe_gt_tlb_invalidation_fence_send(xe, fence); - action[1] = seqno; - ret = xe_guc_ct_send_locked(&guc->ct, action, len, - G2H_LEN_DW_TLB_INVALIDATE, 1); - if (!ret && fence) { - spin_lock_irq(>->tlb_invalidation.pending_lock); - /* - * We haven't actually published the TLB fence as per - * pending_fences, but in theory our seqno could have already - * been written as we acquired the pending_lock. In such a case - * we can just go ahead and signal the fence here. - */ - if (tlb_invalidation_seqno_past(gt, seqno)) { - __invalidation_fence_signal(xe, fence); - } else { - fence->invalidation_time = ktime_get(); - list_add_tail(&fence->link, - >->tlb_invalidation.pending_fences); - - if (list_is_singular(>->tlb_invalidation.pending_fences)) - queue_delayed_work(system_wq, - >->tlb_invalidation.fence_tdr, - tlb_timeout_jiffies(gt)); - } - spin_unlock_irq(>->tlb_invalidation.pending_lock); - } else if (ret < 0 && fence) { - __invalidation_fence_signal(xe, fence); - } - if (!ret) { - gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) % - TLB_INVALIDATION_SEQNO_MAX; - if (!gt->tlb_invalidation.seqno) - gt->tlb_invalidation.seqno = 1; - } - mutex_unlock(&guc->ct.lock); - - return ret; -} - -#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \ - XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \ - XE_GUC_TLB_INVAL_FLUSH_CACHE) - -/** - * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC - * @gt: graphics tile - * @fence: invalidation fence which will be signal on TLB invalidation - * completion - * - * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and - * caller can use the invalidation fence to wait for completion. - * - * Return: 0 on success, negative error code on error - */ -static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence) -{ - u32 action[] = { - XE_GUC_ACTION_TLB_INVALIDATION, - 0, /* seqno, replaced in send_tlb_invalidation */ - MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), - }; - - return send_tlb_invalidation(>->uc.guc, fence, action, - ARRAY_SIZE(action)); -} - -/** - * xe_gt_tlb_invalidation_ggtt - Issue a TLB invalidation on this GT for the GGTT - * @gt: graphics tile - * - * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is - * synchronous. - * - * Return: 0 on success, negative error code on error - */ -int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); - - if (xe_guc_ct_enabled(>->uc.guc.ct) && - gt->uc.guc.submission_state.enabled) { - struct xe_gt_tlb_invalidation_fence fence; - int ret; - - xe_gt_tlb_invalidation_fence_init(gt, &fence, true); - ret = xe_gt_tlb_invalidation_guc(gt, &fence); - if (ret < 0) { - xe_gt_tlb_invalidation_fence_fini(&fence); - return ret; - } - - xe_gt_tlb_invalidation_fence_wait(&fence); - } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { - if (IS_SRIOV_VF(xe)) - return 0; - - xe_gt_WARN_ON(gt, xe_force_wake_get(gt_to_fw(gt), XE_FW_GT)); - if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) { - xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC1, - PVC_GUC_TLB_INV_DESC1_INVALIDATE); - xe_mmio_write32(gt, PVC_GUC_TLB_INV_DESC0, - PVC_GUC_TLB_INV_DESC0_VALID); - } else { - xe_mmio_write32(gt, GUC_TLB_INV_CR, - GUC_TLB_INV_CR_INVALIDATE); - } - xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); - } - - return 0; -} - -/** - * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an - * address range - * - * @gt: graphics tile - * @fence: invalidation fence which will be signal on TLB invalidation - * completion - * @start: start address - * @end: end address - * @asid: address space id - * - * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can use - * the invalidation fence to wait for completion. - * - * Return: Negative error code on error, 0 on success - */ -int xe_gt_tlb_invalidation_range(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - u64 start, u64 end, u32 asid) -{ - struct xe_device *xe = gt_to_xe(gt); -#define MAX_TLB_INVALIDATION_LEN 7 - u32 action[MAX_TLB_INVALIDATION_LEN]; - int len = 0; - - xe_gt_assert(gt, fence); - - /* Execlists not supported */ - if (gt_to_xe(gt)->info.force_execlist) { - __invalidation_fence_signal(xe, fence); - return 0; - } - - action[len++] = XE_GUC_ACTION_TLB_INVALIDATION; - action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */ - if (!xe->info.has_range_tlb_invalidation) { - action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL); - } else { - u64 orig_start = start; - u64 length = end - start; - u64 align; - - if (length < SZ_4K) - length = SZ_4K; - - /* - * We need to invalidate a higher granularity if start address - * is not aligned to length. When start is not aligned with - * length we need to find the length large enough to create an - * address mask covering the required range. - */ - align = roundup_pow_of_two(length); - start = ALIGN_DOWN(start, align); - end = ALIGN(end, align); - length = align; - while (start + length < end) { - length <<= 1; - start = ALIGN_DOWN(orig_start, length); - } - - /* - * Minimum invalidation size for a 2MB page that the hardware - * expects is 16MB - */ - if (length >= SZ_2M) { - length = max_t(u64, SZ_16M, length); - start = ALIGN_DOWN(orig_start, length); - } - - xe_gt_assert(gt, length >= SZ_4K); - xe_gt_assert(gt, is_power_of_2(length)); - xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1, - ilog2(SZ_2M) + 1))); - xe_gt_assert(gt, IS_ALIGNED(start, length)); - - action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE); - action[len++] = asid; - action[len++] = lower_32_bits(start); - action[len++] = upper_32_bits(start); - action[len++] = ilog2(length) - ilog2(SZ_4K); - } - - xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN); - - return send_tlb_invalidation(>->uc.guc, fence, action, len); -} - -/** - * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA - * @gt: graphics tile - * @fence: invalidation fence which will be signal on TLB invalidation - * completion, can be NULL - * @vma: VMA to invalidate - * - * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can use - * the invalidation fence to wait for completion. - * - * Return: Negative error code on error, 0 on success - */ -int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - struct xe_vma *vma) -{ - xe_gt_assert(gt, vma); - - return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma), - xe_vma_end(vma), - xe_vma_vm(vma)->usm.asid); -} - -/** - * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler - * @guc: guc - * @msg: message indicating TLB invalidation done - * @len: length of message - * - * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any - * invalidation fences for seqno. Algorithm for this depends on seqno being - * received in-order and asserts this assumption. - * - * Return: 0 on success, -EPROTO for malformed messages. - */ -int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) -{ - struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = gt_to_xe(gt); - struct xe_gt_tlb_invalidation_fence *fence, *next; - unsigned long flags; - - if (unlikely(len != 1)) - return -EPROTO; - - /* - * This can also be run both directly from the IRQ handler and also in - * process_g2h_msg(). Only one may process any individual CT message, - * however the order they are processed here could result in skipping a - * seqno. To handle that we just process all the seqnos from the last - * seqno_recv up to and including the one in msg[0]. The delta should be - * very small so there shouldn't be much of pending_fences we actually - * need to iterate over here. - * - * From GuC POV we expect the seqnos to always appear in-order, so if we - * see something later in the timeline we can be sure that anything - * appearing earlier has already signalled, just that we have yet to - * officially process the CT message like if racing against - * process_g2h_msg(). - */ - spin_lock_irqsave(>->tlb_invalidation.pending_lock, flags); - if (tlb_invalidation_seqno_past(gt, msg[0])) { - spin_unlock_irqrestore(>->tlb_invalidation.pending_lock, flags); - return 0; - } - - WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]); - - list_for_each_entry_safe(fence, next, - >->tlb_invalidation.pending_fences, link) { - trace_xe_gt_tlb_invalidation_fence_recv(xe, fence); - - if (!tlb_invalidation_seqno_past(gt, fence->seqno)) - break; - - invalidation_fence_signal(xe, fence); - } - - if (!list_empty(>->tlb_invalidation.pending_fences)) - mod_delayed_work(system_wq, - >->tlb_invalidation.fence_tdr, - tlb_timeout_jiffies(gt)); - else - cancel_delayed_work(>->tlb_invalidation.fence_tdr); - - spin_unlock_irqrestore(>->tlb_invalidation.pending_lock, flags); - - return 0; -} - -static const char * -invalidation_fence_get_driver_name(struct dma_fence *dma_fence) -{ - return "xe"; -} - -static const char * -invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) -{ - return "invalidation_fence"; -} - -static const struct dma_fence_ops invalidation_fence_ops = { - .get_driver_name = invalidation_fence_get_driver_name, - .get_timeline_name = invalidation_fence_get_timeline_name, -}; - -/** - * xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence - * @gt: GT - * @fence: TLB invalidation fence to initialize -<<<<<<< - * - * Initialize TLB invalidation fence for use - */ -void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence) -{ -======= - * @stack: fence is stack variable - * - * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini - * must be called if fence is not signaled. - */ -void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, - struct xe_gt_tlb_invalidation_fence *fence, - bool stack) -{ - xe_pm_runtime_get_noresume(gt_to_xe(gt)); - ->>>>>>> - spin_lock_irq(>->tlb_invalidation.lock); - dma_fence_init(&fence->base, &invalidation_fence_ops, - >->tlb_invalidation.lock, - dma_fence_context_alloc(1), 1); - spin_unlock_irq(>->tlb_invalidation.lock); - INIT_LIST_HEAD(&fence->link); -<<<<<<< - dma_fence_get(&fence->base); -======= - if (stack) - set_bit(FENCE_STACK_BIT, &fence->base.flags); - else - dma_fence_get(&fence->base); - fence->gt = gt; -} - -/** - * xe_gt_tlb_invalidation_fence_fini - Finalize TLB invalidation fence - * @fence: TLB invalidation fence to finalize - * - * Drop PM ref which fence took durinig init. - */ -void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence) -{ - xe_pm_runtime_put(gt_to_xe(fence->gt)); ->>>>>>> -} |