diff options
Diffstat (limited to 'drivers/accel/ivpu')
25 files changed, 1326 insertions, 900 deletions
diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig index 1a4c4ed9d113..682c53245286 100644 --- a/drivers/accel/ivpu/Kconfig +++ b/drivers/accel/ivpu/Kconfig @@ -1,16 +1,17 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_ACCEL_IVPU - tristate "Intel VPU for Meteor Lake and newer" + tristate "Intel NPU (Neural Processing Unit)" depends on DRM_ACCEL depends on X86_64 && !UML depends on PCI && PCI_MSI select FW_LOADER - select SHMEM + select DRM_GEM_SHMEM_HELPER select GENERIC_ALLOCATOR help - Choose this option if you have a system that has an 14th generation Intel CPU - or newer. VPU stands for Versatile Processing Unit and it's a CPU-integrated - inference accelerator for Computer Vision and Deep Learning applications. + Choose this option if you have a system with an 14th generation + Intel CPU (Meteor Lake) or newer. Intel NPU (formerly called Intel VPU) + is a CPU-integrated inference accelerator for Computer Vision + and Deep Learning applications. If "M" is selected, the module will be called intel_vpu. diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c index ea453b985b49..19035230563d 100644 --- a/drivers/accel/ivpu/ivpu_debugfs.c +++ b/drivers/accel/ivpu/ivpu_debugfs.c @@ -14,6 +14,7 @@ #include "ivpu_fw.h" #include "ivpu_fw_log.h" #include "ivpu_gem.h" +#include "ivpu_hw.h" #include "ivpu_jsm_msg.h" #include "ivpu_pm.h" @@ -115,6 +116,31 @@ static const struct drm_debugfs_info vdev_debugfs_list[] = { {"reset_pending", reset_pending_show, 0}, }; +static ssize_t +dvfs_mode_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) +{ + struct ivpu_device *vdev = file->private_data; + struct ivpu_fw_info *fw = vdev->fw; + u32 dvfs_mode; + int ret; + + ret = kstrtou32_from_user(user_buf, size, 0, &dvfs_mode); + if (ret < 0) + return ret; + + fw->dvfs_mode = dvfs_mode; + + ivpu_pm_schedule_recovery(vdev); + + return size; +} + +static const struct file_operations dvfs_mode_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = dvfs_mode_fops_write, +}; + static int fw_log_show(struct seq_file *s, void *v) { struct ivpu_device *vdev = s->private; @@ -152,6 +178,30 @@ static const struct file_operations fw_log_fops = { }; static ssize_t +fw_profiling_freq_fops_write(struct file *file, const char __user *user_buf, + size_t size, loff_t *pos) +{ + struct ivpu_device *vdev = file->private_data; + bool enable; + int ret; + + ret = kstrtobool_from_user(user_buf, size, &enable); + if (ret < 0) + return ret; + + ivpu_hw_profiling_freq_drive(vdev, enable); + ivpu_pm_schedule_recovery(vdev); + + return size; +} + +static const struct file_operations fw_profiling_freq_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = fw_profiling_freq_fops_write, +}; + +static ssize_t fw_trace_destination_mask_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos) { @@ -280,6 +330,9 @@ void ivpu_debugfs_init(struct ivpu_device *vdev) debugfs_create_file("force_recovery", 0200, debugfs_root, vdev, &ivpu_force_recovery_fops); + debugfs_create_file("dvfs_mode", 0200, debugfs_root, vdev, + &dvfs_mode_fops); + debugfs_create_file("fw_log", 0644, debugfs_root, vdev, &fw_log_fops); debugfs_create_file("fw_trace_destination_mask", 0200, debugfs_root, vdev, @@ -291,4 +344,8 @@ void ivpu_debugfs_init(struct ivpu_device *vdev) debugfs_create_file("reset_engine", 0200, debugfs_root, vdev, &ivpu_reset_engine_fops); + + if (ivpu_hw_gen(vdev) >= IVPU_HW_40XX) + debugfs_create_file("fw_profiling_freq_drive", 0200, + debugfs_root, vdev, &fw_profiling_freq_fops); } diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 790603017653..64927682161b 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -31,8 +31,6 @@ __stringify(DRM_IVPU_DRIVER_MINOR) "." #endif -static const struct drm_driver driver; - static struct lock_class_key submitted_jobs_xa_lock_class_key; int ivpu_dbg_mask; @@ -41,7 +39,7 @@ MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros."); int ivpu_test_mode; module_param_named_unsafe(test_mode, ivpu_test_mode, int, 0644); -MODULE_PARM_DESC(test_mode, "Test mode: 0 - normal operation, 1 - fw unit test, 2 - null hw"); +MODULE_PARM_DESC(test_mode, "Test mode mask. See IVPU_TEST_MODE_* macros."); u8 ivpu_pll_min_ratio; module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644); @@ -93,8 +91,8 @@ static void file_priv_release(struct kref *ref) ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id); ivpu_cmdq_release_all(file_priv); - ivpu_bo_remove_all_bos_from_context(&file_priv->ctx); ivpu_jsm_context_release(vdev, file_priv->ctx.id); + ivpu_bo_remove_all_bos_from_context(vdev, &file_priv->ctx); ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv); mutex_destroy(&file_priv->lock); @@ -317,16 +315,14 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev) unsigned long timeout; int ret; - if (ivpu_test_mode == IVPU_TEST_MODE_FW_TEST) + if (ivpu_test_mode & IVPU_TEST_MODE_FW_TEST) return 0; - ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG); + ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG, NULL); timeout = jiffies + msecs_to_jiffies(vdev->timeout.boot); while (1) { - ret = ivpu_ipc_irq_handler(vdev); - if (ret) - break; + ivpu_ipc_irq_handler(vdev, NULL); ret = ivpu_ipc_receive(vdev, &cons, &ipc_hdr, NULL, 0); if (ret != -ETIMEDOUT || time_after_eq(jiffies, timeout)) break; @@ -362,7 +358,7 @@ int ivpu_boot(struct ivpu_device *vdev) int ret; /* Update boot params located at first 4KB of FW memory */ - ivpu_fw_boot_params_setup(vdev, vdev->fw->mem->kvaddr); + ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem)); ret = ivpu_hw_boot_fw(vdev); if (ret) { @@ -414,7 +410,9 @@ static const struct drm_driver driver = { .open = ivpu_open, .postclose = ivpu_postclose, - .gem_prime_import = ivpu_gem_prime_import, + + .gem_create_object = ivpu_gem_create_object, + .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table, .ioctls = ivpu_drm_ioctls, .num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls), @@ -427,6 +425,13 @@ static const struct drm_driver driver = { .minor = DRM_IVPU_DRIVER_MINOR, }; +static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg) +{ + struct ivpu_device *vdev = arg; + + return ivpu_ipc_irq_thread_handler(vdev); +} + static int ivpu_irq_init(struct ivpu_device *vdev) { struct pci_dev *pdev = to_pci_dev(vdev->drm.dev); @@ -440,8 +445,8 @@ static int ivpu_irq_init(struct ivpu_device *vdev) vdev->irq = pci_irq_vector(pdev, 0); - ret = devm_request_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler, - IRQF_NO_AUTOEN, DRIVER_NAME, vdev); + ret = devm_request_threaded_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler, + ivpu_irq_thread_handler, IRQF_NO_AUTOEN, DRIVER_NAME, vdev); if (ret) ivpu_err(vdev, "Failed to request an IRQ %d\n", ret); @@ -533,6 +538,11 @@ static int ivpu_dev_init(struct ivpu_device *vdev) xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC); xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1); lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); + INIT_LIST_HEAD(&vdev->bo_list); + + ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock); + if (ret) + goto err_xa_destroy; ret = ivpu_pci_init(vdev); if (ret) @@ -550,7 +560,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev) /* Power up early so the rest of init code can access VPU registers */ ret = ivpu_hw_power_up(vdev); if (ret) - goto err_xa_destroy; + goto err_power_down; ret = ivpu_mmu_global_context_init(vdev); if (ret) @@ -574,20 +584,15 @@ static int ivpu_dev_init(struct ivpu_device *vdev) ivpu_pm_init(vdev); - ret = ivpu_job_done_thread_init(vdev); - if (ret) - goto err_ipc_fini; - ret = ivpu_boot(vdev); if (ret) - goto err_job_done_thread_fini; + goto err_ipc_fini; + ivpu_job_done_consumer_init(vdev); ivpu_pm_enable(vdev); return 0; -err_job_done_thread_fini: - ivpu_job_done_thread_fini(vdev); err_ipc_fini: ivpu_ipc_fini(vdev); err_fw_fini: @@ -612,7 +617,7 @@ static void ivpu_dev_fini(struct ivpu_device *vdev) ivpu_shutdown(vdev); if (IVPU_WA(d3hot_after_power_off)) pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot); - ivpu_job_done_thread_fini(vdev); + ivpu_job_done_consumer_fini(vdev); ivpu_pm_cancel_recovery(vdev); ivpu_ipc_fini(vdev); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 417ddeca8517..ebc4b84f27b2 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -17,9 +17,10 @@ #include <uapi/drm/ivpu_accel.h> #include "ivpu_mmu_context.h" +#include "ivpu_ipc.h" #define DRIVER_NAME "intel_vpu" -#define DRIVER_DESC "Driver for Intel Versatile Processing Unit (VPU)" +#define DRIVER_DESC "Driver for Intel NPU (Neural Processing Unit)" #define DRIVER_DATE "20230117" #define PCI_DEVICE_ID_MTL 0x7d1d @@ -88,6 +89,7 @@ struct ivpu_wa_table { bool d3hot_after_power_off; bool interrupt_clear_with_0; bool disable_clock_relinquish; + bool disable_d0i3_msg; }; struct ivpu_hw_info; @@ -115,8 +117,11 @@ struct ivpu_device { struct xarray context_xa; struct xa_limit context_xa_limit; + struct mutex bo_list_lock; /* Protects bo_list */ + struct list_head bo_list; + struct xarray submitted_jobs_xa; - struct task_struct *job_done_thread; + struct ivpu_ipc_consumer job_done_consumer; atomic64_t unique_id_counter; @@ -126,6 +131,7 @@ struct ivpu_device { int tdr; int reschedule_suspend; int autosuspend; + int d0i3_entry_msg; } timeout; }; @@ -148,9 +154,11 @@ extern u8 ivpu_pll_min_ratio; extern u8 ivpu_pll_max_ratio; extern bool ivpu_disable_mmu_cont_pages; -#define IVPU_TEST_MODE_DISABLED 0 -#define IVPU_TEST_MODE_FW_TEST 1 -#define IVPU_TEST_MODE_NULL_HW 2 +#define IVPU_TEST_MODE_FW_TEST BIT(0) +#define IVPU_TEST_MODE_NULL_HW BIT(1) +#define IVPU_TEST_MODE_NULL_SUBMISSION BIT(2) +#define IVPU_TEST_MODE_D0I3_MSG_DISABLE BIT(4) +#define IVPU_TEST_MODE_D0I3_MSG_ENABLE BIT(5) extern int ivpu_test_mode; struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv); diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c index 691da521dde5..6576232f3e67 100644 --- a/drivers/accel/ivpu/ivpu_fw.c +++ b/drivers/accel/ivpu/ivpu_fw.c @@ -33,12 +33,17 @@ #define ADDR_TO_L2_CACHE_CFG(addr) ((addr) >> 31) -#define IVPU_FW_CHECK_API(vdev, fw_hdr, name, min_major) \ +/* Check if FW API is compatible with the driver */ +#define IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, name, min_major) \ ivpu_fw_check_api(vdev, fw_hdr, #name, \ VPU_##name##_API_VER_INDEX, \ VPU_##name##_API_VER_MAJOR, \ VPU_##name##_API_VER_MINOR, min_major) +/* Check if API version is lower that the given version */ +#define IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, name, major, minor) \ + ivpu_fw_check_api_ver_lt(vdev, fw_hdr, #name, VPU_##name##_API_VER_INDEX, major, minor) + static char *ivpu_firmware; module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644); MODULE_PARM_DESC(firmware, "VPU firmware binary in /lib/firmware/.."); @@ -105,6 +110,19 @@ ivpu_fw_check_api(struct ivpu_device *vdev, const struct vpu_firmware_header *fw return 0; } +static bool +ivpu_fw_check_api_ver_lt(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr, + const char *str, int index, u16 major, u16 minor) +{ + u16 fw_major = (u16)(fw_hdr->api_version[index] >> 16); + u16 fw_minor = (u16)(fw_hdr->api_version[index]); + + if (fw_major < major || (fw_major == major && fw_minor < minor)) + return true; + + return false; +} + static int ivpu_fw_parse(struct ivpu_device *vdev) { struct ivpu_fw_info *fw = vdev->fw; @@ -164,9 +182,9 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) ivpu_info(vdev, "Firmware: %s, version: %s", fw->name, (const char *)fw_hdr + VPU_FW_HEADER_SIZE); - if (IVPU_FW_CHECK_API(vdev, fw_hdr, BOOT, 3)) + if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, BOOT, 3)) return -EINVAL; - if (IVPU_FW_CHECK_API(vdev, fw_hdr, JSM, 3)) + if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, JSM, 3)) return -EINVAL; fw->runtime_addr = runtime_addr; @@ -182,6 +200,8 @@ static int ivpu_fw_parse(struct ivpu_device *vdev) fw->trace_destination_mask = VPU_TRACE_DESTINATION_VERBOSE_TRACING; fw->trace_hw_component_mask = -1; + fw->dvfs_mode = 0; + ivpu_dbg(vdev, FW_BOOT, "Size: file %lu image %u runtime %u shavenn %u\n", fw->file->size, fw->image_size, fw->runtime_size, fw->shave_nn_size); ivpu_dbg(vdev, FW_BOOT, "Address: runtime 0x%llx, load 0x%llx, entry point 0x%llx\n", @@ -195,6 +215,24 @@ static void ivpu_fw_release(struct ivpu_device *vdev) release_firmware(vdev->fw->file); } +/* Initialize workarounds that depend on FW version */ +static void +ivpu_fw_init_wa(struct ivpu_device *vdev) +{ + const struct vpu_firmware_header *fw_hdr = (const void *)vdev->fw->file->data; + + if (IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, BOOT, 3, 17) || + (ivpu_hw_gen(vdev) > IVPU_HW_37XX) || + (ivpu_test_mode & IVPU_TEST_MODE_D0I3_MSG_DISABLE)) + vdev->wa.disable_d0i3_msg = true; + + /* Force enable the feature for testing purposes */ + if (ivpu_test_mode & IVPU_TEST_MODE_D0I3_MSG_ENABLE) + vdev->wa.disable_d0i3_msg = false; + + IVPU_PRINT_WA(disable_d0i3_msg); +} + static int ivpu_fw_update_global_range(struct ivpu_device *vdev) { struct ivpu_fw_info *fw = vdev->fw; @@ -248,7 +286,7 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev) if (fw->shave_nn_size) { fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.shave.start, - fw->shave_nn_size, DRM_IVPU_BO_UNCACHED); + fw->shave_nn_size, DRM_IVPU_BO_WC); if (!fw->mem_shave_nn) { ivpu_err(vdev, "Failed to allocate shavenn buffer\n"); ret = -ENOMEM; @@ -297,6 +335,8 @@ int ivpu_fw_init(struct ivpu_device *vdev) if (ret) goto err_fw_release; + ivpu_fw_init_wa(vdev); + ret = ivpu_fw_mem_init(vdev); if (ret) goto err_fw_release; @@ -422,14 +462,31 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_ boot_params->punit_telemetry_sram_size); ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_telemetry_enable = 0x%x\n", boot_params->vpu_telemetry_enable); + ivpu_dbg(vdev, FW_BOOT, "boot_params.dvfs_mode = %u\n", + boot_params->dvfs_mode); + ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_delayed_entry = %d\n", + boot_params->d0i3_delayed_entry); + ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n", + boot_params->d0i3_residency_time_us); + ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n", + boot_params->d0i3_entry_vpu_ts); } void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params) { struct ivpu_bo *ipc_mem_rx = vdev->ipc->mem_rx; - /* In case of warm boot we only have to reset the entrypoint addr */ + /* In case of warm boot only update variable params */ if (!ivpu_fw_is_cold_boot(vdev)) { + boot_params->d0i3_residency_time_us = + ktime_us_delta(ktime_get_boottime(), vdev->hw->d0i3_entry_host_ts); + boot_params->d0i3_entry_vpu_ts = vdev->hw->d0i3_entry_vpu_ts; + + ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n", + boot_params->d0i3_residency_time_us); + ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n", + boot_params->d0i3_entry_vpu_ts); + boot_params->save_restore_ret_address = 0; vdev->pm->is_warmboot = true; wmb(); /* Flush WC buffers after writing save_restore_ret_address */ @@ -443,6 +500,13 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->frequency = ivpu_hw_reg_pll_freq_get(vdev); /* + * This param is a debug firmware feature. It switches default clock + * to higher resolution one for fine-grained and more accurate firmware + * task profiling. + */ + boot_params->perf_clk_frequency = ivpu_hw_profiling_freq_get(vdev); + + /* * Uncached region of VPU address space, covers IPC buffers, job queues * and log buffers, programmable to L2$ Uncached by VPU MTRR */ @@ -493,6 +557,11 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev); boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev); boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev); + boot_params->dvfs_mode = vdev->fw->dvfs_mode; + if (!IVPU_WA(disable_d0i3_msg)) + boot_params->d0i3_delayed_entry = 1; + boot_params->d0i3_residency_time_us = 0; + boot_params->d0i3_entry_vpu_ts = 0; wmb(); /* Flush WC buffers after writing bootparams */ diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h index 10ae2847f0ef..66b60fa161b5 100644 --- a/drivers/accel/ivpu/ivpu_fw.h +++ b/drivers/accel/ivpu/ivpu_fw.h @@ -27,6 +27,7 @@ struct ivpu_fw_info { u32 trace_level; u32 trace_destination_mask; u64 trace_hw_component_mask; + u32 dvfs_mode; }; int ivpu_fw_init(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index c91852f2edc8..1dda4f38ea25 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -20,215 +20,18 @@ #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" -MODULE_IMPORT_NS(DMA_BUF); - static const struct drm_gem_object_funcs ivpu_gem_funcs; -static struct lock_class_key prime_bo_lock_class_key; - -static int __must_check prime_alloc_pages_locked(struct ivpu_bo *bo) -{ - /* Pages are managed by the underlying dma-buf */ - return 0; -} - -static void prime_free_pages_locked(struct ivpu_bo *bo) -{ - /* Pages are managed by the underlying dma-buf */ -} - -static int prime_map_pages_locked(struct ivpu_bo *bo) -{ - struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - struct sg_table *sgt; - - sgt = dma_buf_map_attachment_unlocked(bo->base.import_attach, DMA_BIDIRECTIONAL); - if (IS_ERR(sgt)) { - ivpu_err(vdev, "Failed to map attachment: %ld\n", PTR_ERR(sgt)); - return PTR_ERR(sgt); - } - - bo->sgt = sgt; - return 0; -} - -static void prime_unmap_pages_locked(struct ivpu_bo *bo) -{ - dma_buf_unmap_attachment_unlocked(bo->base.import_attach, bo->sgt, DMA_BIDIRECTIONAL); - bo->sgt = NULL; -} - -static const struct ivpu_bo_ops prime_ops = { - .type = IVPU_BO_TYPE_PRIME, - .name = "prime", - .alloc_pages = prime_alloc_pages_locked, - .free_pages = prime_free_pages_locked, - .map_pages = prime_map_pages_locked, - .unmap_pages = prime_unmap_pages_locked, -}; - -static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo) -{ - int npages = ivpu_bo_size(bo) >> PAGE_SHIFT; - struct page **pages; - - pages = drm_gem_get_pages(&bo->base); - if (IS_ERR(pages)) - return PTR_ERR(pages); - - if (bo->flags & DRM_IVPU_BO_WC) - set_pages_array_wc(pages, npages); - else if (bo->flags & DRM_IVPU_BO_UNCACHED) - set_pages_array_uc(pages, npages); - - bo->pages = pages; - return 0; -} - -static void shmem_free_pages_locked(struct ivpu_bo *bo) -{ - if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED) - set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT); - - drm_gem_put_pages(&bo->base, bo->pages, true, false); - bo->pages = NULL; -} - -static int ivpu_bo_map_pages_locked(struct ivpu_bo *bo) -{ - int npages = ivpu_bo_size(bo) >> PAGE_SHIFT; - struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - struct sg_table *sgt; - int ret; - - sgt = drm_prime_pages_to_sg(&vdev->drm, bo->pages, npages); - if (IS_ERR(sgt)) { - ivpu_err(vdev, "Failed to allocate sgtable\n"); - return PTR_ERR(sgt); - } - - ret = dma_map_sgtable(vdev->drm.dev, sgt, DMA_BIDIRECTIONAL, 0); - if (ret) { - ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret); - goto err_free_sgt; - } - - bo->sgt = sgt; - return 0; - -err_free_sgt: - kfree(sgt); - return ret; -} - -static void ivpu_bo_unmap_pages_locked(struct ivpu_bo *bo) -{ - struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - - dma_unmap_sgtable(vdev->drm.dev, bo->sgt, DMA_BIDIRECTIONAL, 0); - sg_free_table(bo->sgt); - kfree(bo->sgt); - bo->sgt = NULL; -} - -static const struct ivpu_bo_ops shmem_ops = { - .type = IVPU_BO_TYPE_SHMEM, - .name = "shmem", - .alloc_pages = shmem_alloc_pages_locked, - .free_pages = shmem_free_pages_locked, - .map_pages = ivpu_bo_map_pages_locked, - .unmap_pages = ivpu_bo_unmap_pages_locked, -}; - -static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo) -{ - unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT; - struct page **pages; - int ret; - - pages = kvmalloc_array(npages, sizeof(*bo->pages), GFP_KERNEL); - if (!pages) - return -ENOMEM; - - for (i = 0; i < npages; i++) { - pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); - if (!pages[i]) { - ret = -ENOMEM; - goto err_free_pages; - } - cond_resched(); - } - - bo->pages = pages; - return 0; - -err_free_pages: - while (i--) - put_page(pages[i]); - kvfree(pages); - return ret; -} - -static void internal_free_pages_locked(struct ivpu_bo *bo) -{ - unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT; - - if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED) - set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT); - - for (i = 0; i < npages; i++) - put_page(bo->pages[i]); - - kvfree(bo->pages); - bo->pages = NULL; -} - -static const struct ivpu_bo_ops internal_ops = { - .type = IVPU_BO_TYPE_INTERNAL, - .name = "internal", - .alloc_pages = internal_alloc_pages_locked, - .free_pages = internal_free_pages_locked, - .map_pages = ivpu_bo_map_pages_locked, - .unmap_pages = ivpu_bo_unmap_pages_locked, -}; - -static int __must_check ivpu_bo_alloc_and_map_pages_locked(struct ivpu_bo *bo) -{ - struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - int ret; - - lockdep_assert_held(&bo->lock); - drm_WARN_ON(&vdev->drm, bo->sgt); - - ret = bo->ops->alloc_pages(bo); - if (ret) { - ivpu_err(vdev, "Failed to allocate pages for BO: %d", ret); - return ret; - } - - ret = bo->ops->map_pages(bo); - if (ret) { - ivpu_err(vdev, "Failed to map pages for BO: %d", ret); - goto err_free_pages; - } - return ret; - -err_free_pages: - bo->ops->free_pages(bo); - return ret; -} - -static void ivpu_bo_unmap_and_free_pages(struct ivpu_bo *bo) +static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, const char *action) { - mutex_lock(&bo->lock); - - WARN_ON(!bo->sgt); - bo->ops->unmap_pages(bo); - WARN_ON(bo->sgt); - bo->ops->free_pages(bo); - WARN_ON(bo->pages); - - mutex_unlock(&bo->lock); + if (bo->ctx) + ivpu_dbg(vdev, BO, "%6s: size %zu has_pages %d dma_mapped %d handle %u ctx %d vpu_addr 0x%llx mmu_mapped %d\n", + action, ivpu_bo_size(bo), (bool)bo->base.pages, (bool)bo->base.sgt, + bo->handle, bo->ctx->id, bo->vpu_addr, bo->mmu_mapped); + else + ivpu_dbg(vdev, BO, "%6s: size %zu has_pages %d dma_mapped %d handle %u (not added to context)\n", + action, ivpu_bo_size(bo), (bool)bo->base.pages, (bool)bo->base.sgt, + bo->handle); } /* @@ -245,21 +48,24 @@ int __must_check ivpu_bo_pin(struct ivpu_bo *bo) mutex_lock(&bo->lock); - if (!bo->vpu_addr) { - ivpu_err(vdev, "vpu_addr not set for BO ctx_id: %d handle: %d\n", - bo->ctx->id, bo->handle); + ivpu_dbg_bo(vdev, bo, "pin"); + + if (!bo->ctx) { + ivpu_err(vdev, "vpu_addr not allocated for BO %d\n", bo->handle); ret = -EINVAL; goto unlock; } - if (!bo->sgt) { - ret = ivpu_bo_alloc_and_map_pages_locked(bo); - if (ret) + if (!bo->mmu_mapped) { + struct sg_table *sgt = drm_gem_shmem_get_pages_sgt(&bo->base); + + if (IS_ERR(sgt)) { + ret = PTR_ERR(sgt); + ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret); goto unlock; - } + } - if (!bo->mmu_mapped) { - ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, bo->sgt, + ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, sgt, ivpu_bo_is_snooped(bo)); if (ret) { ivpu_err(vdev, "Failed to map BO in MMU: %d\n", ret); @@ -281,248 +87,213 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx, struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); int ret; - if (!range) { - if (bo->flags & DRM_IVPU_BO_SHAVE_MEM) - range = &vdev->hw->ranges.shave; - else if (bo->flags & DRM_IVPU_BO_DMA_MEM) - range = &vdev->hw->ranges.dma; - else - range = &vdev->hw->ranges.user; - } + mutex_lock(&bo->lock); - mutex_lock(&ctx->lock); - ret = ivpu_mmu_context_insert_node_locked(ctx, range, ivpu_bo_size(bo), &bo->mm_node); + ret = ivpu_mmu_context_insert_node(ctx, range, ivpu_bo_size(bo), &bo->mm_node); if (!ret) { bo->ctx = ctx; bo->vpu_addr = bo->mm_node.start; - list_add_tail(&bo->ctx_node, &ctx->bo_list); + } else { + ivpu_err(vdev, "Failed to add BO to context %u: %d\n", ctx->id, ret); } - mutex_unlock(&ctx->lock); + + ivpu_dbg_bo(vdev, bo, "alloc"); + + mutex_unlock(&bo->lock); return ret; } -static void ivpu_bo_free_vpu_addr(struct ivpu_bo *bo) +static void ivpu_bo_unbind_locked(struct ivpu_bo *bo) { struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - struct ivpu_mmu_context *ctx = bo->ctx; - ivpu_dbg(vdev, BO, "remove from ctx: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n", - ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped); + lockdep_assert_held(&bo->lock); - mutex_lock(&bo->lock); + ivpu_dbg_bo(vdev, bo, "unbind"); + + /* TODO: dma_unmap */ if (bo->mmu_mapped) { - drm_WARN_ON(&vdev->drm, !bo->sgt); - ivpu_mmu_context_unmap_sgt(vdev, ctx, bo->vpu_addr, bo->sgt); + drm_WARN_ON(&vdev->drm, !bo->ctx); + drm_WARN_ON(&vdev->drm, !bo->vpu_addr); + drm_WARN_ON(&vdev->drm, !bo->base.sgt); + ivpu_mmu_context_unmap_sgt(vdev, bo->ctx, bo->vpu_addr, bo->base.sgt); bo->mmu_mapped = false; } - mutex_lock(&ctx->lock); - list_del(&bo->ctx_node); - bo->vpu_addr = 0; - bo->ctx = NULL; - ivpu_mmu_context_remove_node_locked(ctx, &bo->mm_node); - mutex_unlock(&ctx->lock); + if (bo->ctx) { + ivpu_mmu_context_remove_node(bo->ctx, &bo->mm_node); + bo->vpu_addr = 0; + bo->ctx = NULL; + } +} +static void ivpu_bo_unbind(struct ivpu_bo *bo) +{ + mutex_lock(&bo->lock); + ivpu_bo_unbind_locked(bo); mutex_unlock(&bo->lock); } -void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx) +void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx) { - struct ivpu_bo *bo, *tmp; + struct ivpu_bo *bo; + + if (drm_WARN_ON(&vdev->drm, !ctx)) + return; - list_for_each_entry_safe(bo, tmp, &ctx->bo_list, ctx_node) - ivpu_bo_free_vpu_addr(bo); + mutex_lock(&vdev->bo_list_lock); + list_for_each_entry(bo, &vdev->bo_list, bo_list_node) { + mutex_lock(&bo->lock); + if (bo->ctx == ctx) + ivpu_bo_unbind_locked(bo); + mutex_unlock(&bo->lock); + } + mutex_unlock(&vdev->bo_list_lock); } -static struct ivpu_bo * -ivpu_bo_alloc(struct ivpu_device *vdev, struct ivpu_mmu_context *mmu_context, - u64 size, u32 flags, const struct ivpu_bo_ops *ops, - const struct ivpu_addr_range *range, u64 user_ptr) +struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size) { struct ivpu_bo *bo; - int ret = 0; - - if (drm_WARN_ON(&vdev->drm, size == 0 || !PAGE_ALIGNED(size))) - return ERR_PTR(-EINVAL); - switch (flags & DRM_IVPU_BO_CACHE_MASK) { - case DRM_IVPU_BO_CACHED: - case DRM_IVPU_BO_UNCACHED: - case DRM_IVPU_BO_WC: - break; - default: + if (size == 0 || !PAGE_ALIGNED(size)) return ERR_PTR(-EINVAL); - } bo = kzalloc(sizeof(*bo), GFP_KERNEL); if (!bo) return ERR_PTR(-ENOMEM); - mutex_init(&bo->lock); - bo->base.funcs = &ivpu_gem_funcs; - bo->flags = flags; - bo->ops = ops; - bo->user_ptr = user_ptr; - - if (ops->type == IVPU_BO_TYPE_SHMEM) - ret = drm_gem_object_init(&vdev->drm, &bo->base, size); - else - drm_gem_private_object_init(&vdev->drm, &bo->base, size); - - if (ret) { - ivpu_err(vdev, "Failed to initialize drm object\n"); - goto err_free; - } - - if (flags & DRM_IVPU_BO_MAPPABLE) { - ret = drm_gem_create_mmap_offset(&bo->base); - if (ret) { - ivpu_err(vdev, "Failed to allocate mmap offset\n"); - goto err_release; - } - } - - if (mmu_context) { - ret = ivpu_bo_alloc_vpu_addr(bo, mmu_context, range); - if (ret) { - ivpu_err(vdev, "Failed to add BO to context: %d\n", ret); - goto err_release; - } - } + bo->base.base.funcs = &ivpu_gem_funcs; + bo->base.pages_mark_dirty_on_put = true; /* VPU can dirty a BO anytime */ - return bo; + INIT_LIST_HEAD(&bo->bo_list_node); + mutex_init(&bo->lock); -err_release: - drm_gem_object_release(&bo->base); -err_free: - kfree(bo); - return ERR_PTR(ret); + return &bo->base.base; } -static void ivpu_bo_free(struct drm_gem_object *obj) +static struct ivpu_bo * +ivpu_bo_create(struct ivpu_device *vdev, u64 size, u32 flags) { - struct ivpu_bo *bo = to_ivpu_bo(obj); - struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - - if (bo->ctx) - ivpu_dbg(vdev, BO, "free: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n", - bo->ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped); - else - ivpu_dbg(vdev, BO, "free: ctx (released) allocated %d mmu_mapped %d\n", - (bool)bo->sgt, bo->mmu_mapped); - - drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ)); + struct drm_gem_shmem_object *shmem; + struct ivpu_bo *bo; - vunmap(bo->kvaddr); + switch (flags & DRM_IVPU_BO_CACHE_MASK) { + case DRM_IVPU_BO_CACHED: + case DRM_IVPU_BO_WC: + break; + default: + return ERR_PTR(-EINVAL); + } - if (bo->ctx) - ivpu_bo_free_vpu_addr(bo); + shmem = drm_gem_shmem_create(&vdev->drm, size); + if (IS_ERR(shmem)) + return ERR_CAST(shmem); - if (bo->sgt) - ivpu_bo_unmap_and_free_pages(bo); + bo = to_ivpu_bo(&shmem->base); + bo->base.map_wc = flags & DRM_IVPU_BO_WC; + bo->flags = flags; - if (bo->base.import_attach) - drm_prime_gem_destroy(&bo->base, bo->sgt); + mutex_lock(&vdev->bo_list_lock); + list_add_tail(&bo->bo_list_node, &vdev->bo_list); + mutex_unlock(&vdev->bo_list_lock); - drm_gem_object_release(&bo->base); + ivpu_dbg(vdev, BO, "create: vpu_addr 0x%llx size %zu flags 0x%x\n", + bo->vpu_addr, bo->base.base.size, flags); - mutex_destroy(&bo->lock); - kfree(bo); + return bo; } -static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file) { + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; struct ivpu_bo *bo = to_ivpu_bo(obj); - struct ivpu_device *vdev = ivpu_bo_to_vdev(bo); - - ivpu_dbg(vdev, BO, "mmap: ctx %u handle %u vpu_addr 0x%llx size %zu type %s", - bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo), bo->ops->name); + struct ivpu_addr_range *range; - if (obj->import_attach) { - /* Drop the reference drm_gem_mmap_obj() acquired.*/ - drm_gem_object_put(obj); - vma->vm_private_data = NULL; - return dma_buf_mmap(obj->dma_buf, vma, 0); - } - - vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND); - vma->vm_page_prot = ivpu_bo_pgprot(bo, vm_get_page_prot(vma->vm_flags)); + if (bo->flags & DRM_IVPU_BO_SHAVE_MEM) + range = &vdev->hw->ranges.shave; + else if (bo->flags & DRM_IVPU_BO_DMA_MEM) + range = &vdev->hw->ranges.dma; + else + range = &vdev->hw->ranges.user; - return 0; + return ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, range); } -static struct sg_table *ivpu_bo_get_sg_table(struct drm_gem_object *obj) +static void ivpu_bo_free(struct drm_gem_object *obj) { + struct ivpu_device *vdev = to_ivpu_device(obj->dev); struct ivpu_bo *bo = to_ivpu_bo(obj); - loff_t npages = obj->size >> PAGE_SHIFT; - int ret = 0; - mutex_lock(&bo->lock); + mutex_lock(&vdev->bo_list_lock); + list_del(&bo->bo_list_node); + mutex_unlock(&vdev->bo_list_lock); - if (!bo->sgt) - ret = ivpu_bo_alloc_and_map_pages_locked(bo); + drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ)); - mutex_unlock(&bo->lock); + ivpu_dbg_bo(vdev, bo, "free"); - if (ret) - return ERR_PTR(ret); + ivpu_bo_unbind(bo); + mutex_destroy(&bo->lock); - return drm_prime_pages_to_sg(obj->dev, bo->pages, npages); + drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1); + drm_gem_shmem_free(&bo->base); } -static vm_fault_t ivpu_vm_fault(struct vm_fault *vmf) -{ - struct vm_area_struct *vma = vmf->vma; - struct drm_gem_object *obj = vma->vm_private_data; - struct ivpu_bo *bo = to_ivpu_bo(obj); - loff_t npages = obj->size >> PAGE_SHIFT; - pgoff_t page_offset; - struct page *page; - vm_fault_t ret; - int err; - - mutex_lock(&bo->lock); - - if (!bo->sgt) { - err = ivpu_bo_alloc_and_map_pages_locked(bo); - if (err) { - ret = vmf_error(err); - goto unlock; - } - } - - /* We don't use vmf->pgoff since that has the fake offset */ - page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; - if (page_offset >= npages) { - ret = VM_FAULT_SIGBUS; - } else { - page = bo->pages[page_offset]; - ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page)); - } - -unlock: - mutex_unlock(&bo->lock); +static const struct dma_buf_ops ivpu_bo_dmabuf_ops = { + .cache_sgt_mapping = true, + .attach = drm_gem_map_attach, + .detach = drm_gem_map_detach, + .map_dma_buf = drm_gem_map_dma_buf, + .unmap_dma_buf = drm_gem_unmap_dma_buf, + .release = drm_gem_dmabuf_release, + .mmap = drm_gem_dmabuf_mmap, + .vmap = drm_gem_dmabuf_vmap, + .vunmap = drm_gem_dmabuf_vunmap, +}; - return ret; +static struct dma_buf *ivpu_bo_export(struct drm_gem_object *obj, int flags) +{ + struct drm_device *dev = obj->dev; + struct dma_buf_export_info exp_info = { + .exp_name = KBUILD_MODNAME, + .owner = dev->driver->fops->owner, + .ops = &ivpu_bo_dmabuf_ops, + .size = obj->size, + .flags = flags, + .priv = obj, + .resv = obj->resv, + }; + void *sgt; + + /* + * Make sure that pages are allocated and dma-mapped before exporting the bo. + * DMA-mapping is required if the bo will be imported to the same device. + */ + sgt = drm_gem_shmem_get_pages_sgt(to_drm_gem_shmem_obj(obj)); + if (IS_ERR(sgt)) + return sgt; + + return drm_gem_dmabuf_export(dev, &exp_info); } -static const struct vm_operations_struct ivpu_vm_ops = { - .fault = ivpu_vm_fault, - .open = drm_gem_vm_open, - .close = drm_gem_vm_close, -}; - static const struct drm_gem_object_funcs ivpu_gem_funcs = { .free = ivpu_bo_free, - .mmap = ivpu_bo_mmap, - .vm_ops = &ivpu_vm_ops, - .get_sg_table = ivpu_bo_get_sg_table, + .open = ivpu_bo_open, + .export = ivpu_bo_export, + .print_info = drm_gem_shmem_object_print_info, + .pin = drm_gem_shmem_object_pin, + .unpin = drm_gem_shmem_object_unpin, + .get_sg_table = drm_gem_shmem_object_get_sg_table, + .vmap = drm_gem_shmem_object_vmap, + .vunmap = drm_gem_shmem_object_vunmap, + .mmap = drm_gem_shmem_object_mmap, + .vm_ops = &drm_gem_shmem_vm_ops, }; -int -ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct ivpu_file_priv *file_priv = file->driver_priv; struct ivpu_device *vdev = file_priv->vdev; @@ -537,23 +308,20 @@ ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (size == 0) return -EINVAL; - bo = ivpu_bo_alloc(vdev, &file_priv->ctx, size, args->flags, &shmem_ops, NULL, 0); + bo = ivpu_bo_create(vdev, size, args->flags); if (IS_ERR(bo)) { ivpu_err(vdev, "Failed to create BO: %pe (ctx %u size %llu flags 0x%x)", bo, file_priv->ctx.id, args->size, args->flags); return PTR_ERR(bo); } - ret = drm_gem_handle_create(file, &bo->base, &bo->handle); + ret = drm_gem_handle_create(file, &bo->base.base, &bo->handle); if (!ret) { args->vpu_addr = bo->vpu_addr; args->handle = bo->handle; } - drm_gem_object_put(&bo->base); - - ivpu_dbg(vdev, BO, "alloc shmem: ctx %u vpu_addr 0x%llx size %zu flags 0x%x\n", - file_priv->ctx.id, bo->vpu_addr, ivpu_bo_size(bo), bo->flags); + drm_gem_object_put(&bo->base.base); return ret; } @@ -563,8 +331,8 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla { const struct ivpu_addr_range *range; struct ivpu_addr_range fixed_range; + struct iosys_map map; struct ivpu_bo *bo; - pgprot_t prot; int ret; drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(vpu_addr)); @@ -578,81 +346,42 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla range = &vdev->hw->ranges.global; } - bo = ivpu_bo_alloc(vdev, &vdev->gctx, size, flags, &internal_ops, range, 0); + bo = ivpu_bo_create(vdev, size, flags); if (IS_ERR(bo)) { ivpu_err(vdev, "Failed to create BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)", bo, vpu_addr, size, flags); return NULL; } - ret = ivpu_bo_pin(bo); + ret = ivpu_bo_alloc_vpu_addr(bo, &vdev->gctx, range); if (ret) goto err_put; - if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED) - drm_clflush_pages(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT); - - if (bo->flags & DRM_IVPU_BO_WC) - set_pages_array_wc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT); - else if (bo->flags & DRM_IVPU_BO_UNCACHED) - set_pages_array_uc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT); - - prot = ivpu_bo_pgprot(bo, PAGE_KERNEL); - bo->kvaddr = vmap(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT, VM_MAP, prot); - if (!bo->kvaddr) { - ivpu_err(vdev, "Failed to map BO into kernel virtual memory\n"); + ret = ivpu_bo_pin(bo); + if (ret) goto err_put; - } - ivpu_dbg(vdev, BO, "alloc internal: ctx 0 vpu_addr 0x%llx size %zu flags 0x%x\n", - bo->vpu_addr, ivpu_bo_size(bo), flags); + ret = drm_gem_shmem_vmap(&bo->base, &map); + if (ret) + goto err_put; return bo; err_put: - drm_gem_object_put(&bo->base); + drm_gem_object_put(&bo->base.base); return NULL; } void ivpu_bo_free_internal(struct ivpu_bo *bo) { - drm_gem_object_put(&bo->base); -} - -struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *buf) -{ - struct ivpu_device *vdev = to_ivpu_device(dev); - struct dma_buf_attachment *attach; - struct ivpu_bo *bo; - - attach = dma_buf_attach(buf, dev->dev); - if (IS_ERR(attach)) - return ERR_CAST(attach); - - get_dma_buf(buf); + struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr); - bo = ivpu_bo_alloc(vdev, NULL, buf->size, DRM_IVPU_BO_MAPPABLE, &prime_ops, NULL, 0); - if (IS_ERR(bo)) { - ivpu_err(vdev, "Failed to import BO: %pe (size %lu)", bo, buf->size); - goto err_detach; - } - - lockdep_set_class(&bo->lock, &prime_bo_lock_class_key); - - bo->base.import_attach = attach; - - return &bo->base; - -err_detach: - dma_buf_detach(buf, attach); - dma_buf_put(buf); - return ERR_CAST(bo); + drm_gem_shmem_vunmap(&bo->base, &map); + drm_gem_object_put(&bo->base.base); } int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct ivpu_file_priv *file_priv = file->driver_priv; - struct ivpu_device *vdev = to_ivpu_device(dev); struct drm_ivpu_bo_info *args = data; struct drm_gem_object *obj; struct ivpu_bo *bo; @@ -665,21 +394,12 @@ int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file bo = to_ivpu_bo(obj); mutex_lock(&bo->lock); - - if (!bo->ctx) { - ret = ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, NULL); - if (ret) { - ivpu_err(vdev, "Failed to allocate vpu_addr: %d\n", ret); - goto unlock; - } - } - args->flags = bo->flags; args->mmap_offset = drm_vma_node_offset_addr(&obj->vma_node); args->vpu_addr = bo->vpu_addr; args->size = obj->size; -unlock: mutex_unlock(&bo->lock); + drm_gem_object_put(obj); return ret; } @@ -714,41 +434,41 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) { unsigned long dma_refcount = 0; - if (bo->base.dma_buf && bo->base.dma_buf->file) - dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count); + mutex_lock(&bo->lock); + + if (bo->base.base.dma_buf && bo->base.base.dma_buf->file) + dma_refcount = atomic_long_read(&bo->base.base.dma_buf->file->f_count); + + drm_printf(p, "%-3u %-6d 0x%-12llx %-10lu 0x%-8x %-4u %-8lu", + bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.base.size, + bo->flags, kref_read(&bo->base.base.refcount), dma_refcount); + + if (bo->base.base.import_attach) + drm_printf(p, " imported"); + + if (bo->base.pages) + drm_printf(p, " has_pages"); - drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n", - bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo), - kref_read(&bo->base.refcount), dma_refcount, bo->ops->name); + if (bo->mmu_mapped) + drm_printf(p, " mmu_mapped"); + + drm_printf(p, "\n"); + + mutex_unlock(&bo->lock); } void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p) { struct ivpu_device *vdev = to_ivpu_device(dev); - struct ivpu_file_priv *file_priv; - unsigned long ctx_id; struct ivpu_bo *bo; - drm_printf(p, "%5s %6s %16s %10s %10s %12s %14s\n", - "ctx", "handle", "vpu_addr", "size", "refcount", "dma_refcount", "type"); + drm_printf(p, "%-3s %-6s %-14s %-10s %-10s %-4s %-8s %s\n", + "ctx", "handle", "vpu_addr", "size", "flags", "refs", "dma_refs", "attribs"); - mutex_lock(&vdev->gctx.lock); - list_for_each_entry(bo, &vdev->gctx.bo_list, ctx_node) + mutex_lock(&vdev->bo_list_lock); + list_for_each_entry(bo, &vdev->bo_list, bo_list_node) ivpu_bo_print_info(bo, p); - mutex_unlock(&vdev->gctx.lock); - - xa_for_each(&vdev->context_xa, ctx_id, file_priv) { - file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id); - if (!file_priv) - continue; - - mutex_lock(&file_priv->ctx.lock); - list_for_each_entry(bo, &file_priv->ctx.bo_list, ctx_node) - ivpu_bo_print_info(bo, p); - mutex_unlock(&file_priv->ctx.lock); - - ivpu_file_priv_put(&file_priv); - } + mutex_unlock(&vdev->bo_list_lock); } void ivpu_bo_list_print(struct drm_device *dev) diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h index a0b4d4a32b3b..d75cad0d3c74 100644 --- a/drivers/accel/ivpu/ivpu_gem.h +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -6,84 +6,52 @@ #define __IVPU_GEM_H__ #include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> #include <drm/drm_mm.h> -struct dma_buf; -struct ivpu_bo_ops; struct ivpu_file_priv; struct ivpu_bo { - struct drm_gem_object base; - const struct ivpu_bo_ops *ops; - + struct drm_gem_shmem_object base; struct ivpu_mmu_context *ctx; - struct list_head ctx_node; + struct list_head bo_list_node; struct drm_mm_node mm_node; - struct mutex lock; /* Protects: pages, sgt, mmu_mapped */ - struct sg_table *sgt; - struct page **pages; - bool mmu_mapped; - - void *kvaddr; + struct mutex lock; /* Protects: ctx, mmu_mapped, vpu_addr */ u64 vpu_addr; u32 handle; u32 flags; - uintptr_t user_ptr; - u32 job_status; -}; - -enum ivpu_bo_type { - IVPU_BO_TYPE_SHMEM = 1, - IVPU_BO_TYPE_INTERNAL, - IVPU_BO_TYPE_PRIME, -}; - -struct ivpu_bo_ops { - enum ivpu_bo_type type; - const char *name; - int (*alloc_pages)(struct ivpu_bo *bo); - void (*free_pages)(struct ivpu_bo *bo); - int (*map_pages)(struct ivpu_bo *bo); - void (*unmap_pages)(struct ivpu_bo *bo); + u32 job_status; /* Valid only for command buffer */ + bool mmu_mapped; }; int ivpu_bo_pin(struct ivpu_bo *bo); -void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx); -void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p); -void ivpu_bo_list_print(struct drm_device *dev); +void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); -struct ivpu_bo * -ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags); +struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size); +struct ivpu_bo *ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags); void ivpu_bo_free_internal(struct ivpu_bo *bo); -struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); -void ivpu_bo_unmap_sgt_and_remove_from_context(struct ivpu_bo *bo); int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p); +void ivpu_bo_list_print(struct drm_device *dev); + static inline struct ivpu_bo *to_ivpu_bo(struct drm_gem_object *obj) { - return container_of(obj, struct ivpu_bo, base); + return container_of(obj, struct ivpu_bo, base.base); } static inline void *ivpu_bo_vaddr(struct ivpu_bo *bo) { - return bo->kvaddr; + return bo->base.vaddr; } static inline size_t ivpu_bo_size(struct ivpu_bo *bo) { - return bo->base.size; -} - -static inline struct page *ivpu_bo_get_page(struct ivpu_bo *bo, u64 offset) -{ - if (offset > ivpu_bo_size(bo) || !bo->pages) - return NULL; - - return bo->pages[offset / PAGE_SIZE]; + return bo->base.base.size; } static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo) @@ -96,20 +64,9 @@ static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo) return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED; } -static inline pgprot_t ivpu_bo_pgprot(struct ivpu_bo *bo, pgprot_t prot) -{ - if (bo->flags & DRM_IVPU_BO_WC) - return pgprot_writecombine(prot); - - if (bo->flags & DRM_IVPU_BO_UNCACHED) - return pgprot_noncached(prot); - - return prot; -} - static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo) { - return to_ivpu_device(bo->base.dev); + return to_ivpu_device(bo->base.base.dev); } static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr) diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h index 1079e06255ba..b2909168a0a6 100644 --- a/drivers/accel/ivpu/ivpu_hw.h +++ b/drivers/accel/ivpu/ivpu_hw.h @@ -15,8 +15,11 @@ struct ivpu_hw_ops { int (*power_down)(struct ivpu_device *vdev); int (*reset)(struct ivpu_device *vdev); bool (*is_idle)(struct ivpu_device *vdev); + int (*wait_for_idle)(struct ivpu_device *vdev); void (*wdt_disable)(struct ivpu_device *vdev); void (*diagnose_failure)(struct ivpu_device *vdev); + u32 (*profiling_freq_get)(struct ivpu_device *vdev); + void (*profiling_freq_drive)(struct ivpu_device *vdev, bool enable); u32 (*reg_pll_freq_get)(struct ivpu_device *vdev); u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev); u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev); @@ -58,6 +61,8 @@ struct ivpu_hw_info { u32 sku; u16 config; int dma_bits; + ktime_t d0i3_entry_host_ts; + u64 d0i3_entry_vpu_ts; }; extern const struct ivpu_hw_ops ivpu_hw_37xx_ops; @@ -85,6 +90,11 @@ static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev) return vdev->hw->ops->is_idle(vdev); }; +static inline int ivpu_hw_wait_for_idle(struct ivpu_device *vdev) +{ + return vdev->hw->ops->wait_for_idle(vdev); +}; + static inline int ivpu_hw_power_down(struct ivpu_device *vdev) { ivpu_dbg(vdev, PM, "HW power down\n"); @@ -104,6 +114,16 @@ static inline void ivpu_hw_wdt_disable(struct ivpu_device *vdev) vdev->hw->ops->wdt_disable(vdev); }; +static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev) +{ + return vdev->hw->ops->profiling_freq_get(vdev); +}; + +static inline void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable) +{ + return vdev->hw->ops->profiling_freq_drive(vdev, enable); +}; + /* Register indirect accesses */ static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev) { diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c index d530384f8d60..574cdeefb66b 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx.c +++ b/drivers/accel/ivpu/ivpu_hw_37xx.c @@ -29,6 +29,7 @@ #define PLL_REF_CLK_FREQ (50 * 1000000) #define PLL_SIMULATION_FREQ (10 * 1000000) +#define PLL_PROF_CLK_FREQ (38400 * 1000) #define PLL_DEFAULT_EPP_VALUE 0x80 #define TIM_SAFE_ENABLE 0xf1d0dead @@ -37,7 +38,7 @@ #define TIMEOUT_US (150 * USEC_PER_MSEC) #define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC) #define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) -#define IDLE_TIMEOUT_US (500 * USEC_PER_MSEC) +#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC) #define ICB_0_IRQ_MASK ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \ (REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \ @@ -96,6 +97,7 @@ static void ivpu_hw_timeouts_init(struct ivpu_device *vdev) vdev->timeout.tdr = 2000; vdev->timeout.reschedule_suspend = 10; vdev->timeout.autosuspend = 10; + vdev->timeout.d0i3_entry_msg = 5; } static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev) @@ -722,10 +724,23 @@ static bool ivpu_hw_37xx_is_idle(struct ivpu_device *vdev) REG_TEST_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, val); } +static int ivpu_hw_37xx_wait_for_idle(struct ivpu_device *vdev) +{ + return REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); +} + +static void ivpu_hw_37xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev) +{ + vdev->hw->d0i3_entry_host_ts = ktime_get_boottime(); + vdev->hw->d0i3_entry_vpu_ts = REGV_RD64(VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT); +} + static int ivpu_hw_37xx_power_down(struct ivpu_device *vdev) { int ret = 0; + ivpu_hw_37xx_save_d0i3_entry_timestamp(vdev); + if (!ivpu_hw_37xx_is_idle(vdev)) ivpu_warn(vdev, "VPU not idle during power down\n"); @@ -760,6 +775,16 @@ static void ivpu_hw_37xx_wdt_disable(struct ivpu_device *vdev) REGV_WR32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, val); } +static u32 ivpu_hw_37xx_profiling_freq_get(struct ivpu_device *vdev) +{ + return PLL_PROF_CLK_FREQ; +} + +static void ivpu_hw_37xx_profiling_freq_drive(struct ivpu_device *vdev, bool enable) +{ + /* Profiling freq - is a debug feature. Unavailable on VPU 37XX. */ +} + static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config) { u32 pll_clock = PLL_REF_CLK_FREQ * ratio; @@ -871,17 +896,20 @@ static void ivpu_hw_37xx_irq_noc_firewall_handler(struct ivpu_device *vdev) } /* Handler for IRQs from VPU core (irqV) */ -static u32 ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq) +static bool ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq, bool *wake_thread) { u32 status = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK; + if (!status) + return false; + REGV_WR32(VPU_37XX_HOST_SS_ICB_CLEAR_0, status); if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status)) ivpu_mmu_irq_evtq_handler(vdev); if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status)) - ivpu_ipc_irq_handler(vdev); + ivpu_ipc_irq_handler(vdev, wake_thread); if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status)) ivpu_dbg(vdev, IRQ, "MMU sync complete\n"); @@ -898,17 +926,17 @@ static u32 ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq) if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status)) ivpu_hw_37xx_irq_noc_firewall_handler(vdev); - return status; + return true; } /* Handler for IRQs from Buttress core (irqB) */ -static u32 ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq) +static bool ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq) { u32 status = REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK; bool schedule_recovery = false; - if (status == 0) - return 0; + if (!status) + return false; if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status)) ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", @@ -944,23 +972,27 @@ static u32 ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq) if (schedule_recovery) ivpu_pm_schedule_recovery(vdev); - return status; + return true; } static irqreturn_t ivpu_hw_37xx_irq_handler(int irq, void *ptr) { struct ivpu_device *vdev = ptr; - u32 ret_irqv, ret_irqb; + bool irqv_handled, irqb_handled, wake_thread = false; REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x1); - ret_irqv = ivpu_hw_37xx_irqv_handler(vdev, irq); - ret_irqb = ivpu_hw_37xx_irqb_handler(vdev, irq); + irqv_handled = ivpu_hw_37xx_irqv_handler(vdev, irq, &wake_thread); + irqb_handled = ivpu_hw_37xx_irqb_handler(vdev, irq); /* Re-enable global interrupts to re-trigger MSI for pending interrupts */ REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x0); - return IRQ_RETVAL(ret_irqb | ret_irqv); + if (wake_thread) + return IRQ_WAKE_THREAD; + if (irqv_handled || irqb_handled) + return IRQ_HANDLED; + return IRQ_NONE; } static void ivpu_hw_37xx_diagnose_failure(struct ivpu_device *vdev) @@ -997,11 +1029,14 @@ const struct ivpu_hw_ops ivpu_hw_37xx_ops = { .info_init = ivpu_hw_37xx_info_init, .power_up = ivpu_hw_37xx_power_up, .is_idle = ivpu_hw_37xx_is_idle, + .wait_for_idle = ivpu_hw_37xx_wait_for_idle, .power_down = ivpu_hw_37xx_power_down, .reset = ivpu_hw_37xx_reset, .boot_fw = ivpu_hw_37xx_boot_fw, .wdt_disable = ivpu_hw_37xx_wdt_disable, .diagnose_failure = ivpu_hw_37xx_diagnose_failure, + .profiling_freq_get = ivpu_hw_37xx_profiling_freq_get, + .profiling_freq_drive = ivpu_hw_37xx_profiling_freq_drive, .reg_pll_freq_get = ivpu_hw_37xx_reg_pll_freq_get, .reg_telemetry_offset_get = ivpu_hw_37xx_reg_telemetry_offset_get, .reg_telemetry_size_get = ivpu_hw_37xx_reg_telemetry_size_get, diff --git a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h index 4083beb5e9db..f6fec1919202 100644 --- a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h +++ b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h @@ -240,6 +240,8 @@ #define VPU_37XX_CPU_SS_TIM_GEN_CONFIG 0x06021008u #define VPU_37XX_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK BIT_MASK(9) +#define VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT 0x06029000u + #define VPU_37XX_CPU_SS_DOORBELL_0 0x06300000u #define VPU_37XX_CPU_SS_DOORBELL_0_SET_MASK BIT_MASK(0) diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c index e691c49c9841..eba2fdef2ace 100644 --- a/drivers/accel/ivpu/ivpu_hw_40xx.c +++ b/drivers/accel/ivpu/ivpu_hw_40xx.c @@ -39,6 +39,7 @@ #define TIMEOUT_US (150 * USEC_PER_MSEC) #define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC) #define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC) +#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC) #define WEIGHTS_DEFAULT 0xf711f711u #define WEIGHTS_ATS_DEFAULT 0x0000f711u @@ -139,18 +140,21 @@ static void ivpu_hw_timeouts_init(struct ivpu_device *vdev) vdev->timeout.tdr = 2000000; vdev->timeout.reschedule_suspend = 1000; vdev->timeout.autosuspend = -1; + vdev->timeout.d0i3_entry_msg = 500; } else if (ivpu_is_simics(vdev)) { vdev->timeout.boot = 50; vdev->timeout.jsm = 500; vdev->timeout.tdr = 10000; vdev->timeout.reschedule_suspend = 10; vdev->timeout.autosuspend = -1; + vdev->timeout.d0i3_entry_msg = 100; } else { vdev->timeout.boot = 1000; vdev->timeout.jsm = 500; vdev->timeout.tdr = 2000; vdev->timeout.reschedule_suspend = 10; vdev->timeout.autosuspend = 10; + vdev->timeout.d0i3_entry_msg = 5; } } @@ -824,12 +828,6 @@ static int ivpu_hw_40xx_power_up(struct ivpu_device *vdev) { int ret; - ret = ivpu_hw_40xx_reset(vdev); - if (ret) { - ivpu_err(vdev, "Failed to reset HW: %d\n", ret); - return ret; - } - ret = ivpu_hw_40xx_d0i3_disable(vdev); if (ret) ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret); @@ -898,10 +896,23 @@ static bool ivpu_hw_40xx_is_idle(struct ivpu_device *vdev) REG_TEST_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, val); } +static int ivpu_hw_40xx_wait_for_idle(struct ivpu_device *vdev) +{ + return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US); +} + +static void ivpu_hw_40xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev) +{ + vdev->hw->d0i3_entry_host_ts = ktime_get_boottime(); + vdev->hw->d0i3_entry_vpu_ts = REGV_RD64(VPU_40XX_CPU_SS_TIM_PERF_EXT_FREE_CNT); +} + static int ivpu_hw_40xx_power_down(struct ivpu_device *vdev) { int ret = 0; + ivpu_hw_40xx_save_d0i3_entry_timestamp(vdev); + if (!ivpu_hw_40xx_is_idle(vdev) && ivpu_hw_40xx_reset(vdev)) ivpu_warn(vdev, "Failed to reset the VPU\n"); @@ -933,6 +944,19 @@ static void ivpu_hw_40xx_wdt_disable(struct ivpu_device *vdev) REGV_WR32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, val); } +static u32 ivpu_hw_40xx_profiling_freq_get(struct ivpu_device *vdev) +{ + return vdev->hw->pll.profiling_freq; +} + +static void ivpu_hw_40xx_profiling_freq_drive(struct ivpu_device *vdev, bool enable) +{ + if (enable) + vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_HIGH; + else + vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT; +} + /* Register indirect accesses */ static u32 ivpu_hw_40xx_reg_pll_freq_get(struct ivpu_device *vdev) { @@ -1023,13 +1047,12 @@ static void ivpu_hw_40xx_irq_noc_firewall_handler(struct ivpu_device *vdev) } /* Handler for IRQs from VPU core (irqV) */ -static irqreturn_t ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq) +static bool ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq, bool *wake_thread) { u32 status = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK; - irqreturn_t ret = IRQ_NONE; if (!status) - return IRQ_NONE; + return false; REGV_WR32(VPU_40XX_HOST_SS_ICB_CLEAR_0, status); @@ -1037,7 +1060,7 @@ static irqreturn_t ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq) ivpu_mmu_irq_evtq_handler(vdev); if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status)) - ret |= ivpu_ipc_irq_handler(vdev); + ivpu_ipc_irq_handler(vdev, wake_thread); if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status)) ivpu_dbg(vdev, IRQ, "MMU sync complete\n"); @@ -1054,17 +1077,17 @@ static irqreturn_t ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq) if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status)) ivpu_hw_40xx_irq_noc_firewall_handler(vdev); - return ret; + return true; } /* Handler for IRQs from Buttress core (irqB) */ -static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq) +static bool ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq) { bool schedule_recovery = false; u32 status = REGB_RD32(VPU_40XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK; - if (status == 0) - return IRQ_NONE; + if (!status) + return false; if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status)) ivpu_dbg(vdev, IRQ, "FREQ_CHANGE"); @@ -1116,26 +1139,27 @@ static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq) if (schedule_recovery) ivpu_pm_schedule_recovery(vdev); - return IRQ_HANDLED; + return true; } static irqreturn_t ivpu_hw_40xx_irq_handler(int irq, void *ptr) { + bool irqv_handled, irqb_handled, wake_thread = false; struct ivpu_device *vdev = ptr; - irqreturn_t ret = IRQ_NONE; REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1); - ret |= ivpu_hw_40xx_irqv_handler(vdev, irq); - ret |= ivpu_hw_40xx_irqb_handler(vdev, irq); + irqv_handled = ivpu_hw_40xx_irqv_handler(vdev, irq, &wake_thread); + irqb_handled = ivpu_hw_40xx_irqb_handler(vdev, irq); /* Re-enable global interrupts to re-trigger MSI for pending interrupts */ REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0); - if (ret & IRQ_WAKE_THREAD) + if (wake_thread) return IRQ_WAKE_THREAD; - - return ret; + if (irqv_handled || irqb_handled) + return IRQ_HANDLED; + return IRQ_NONE; } static void ivpu_hw_40xx_diagnose_failure(struct ivpu_device *vdev) @@ -1185,11 +1209,14 @@ const struct ivpu_hw_ops ivpu_hw_40xx_ops = { .info_init = ivpu_hw_40xx_info_init, .power_up = ivpu_hw_40xx_power_up, .is_idle = ivpu_hw_40xx_is_idle, + .wait_for_idle = ivpu_hw_40xx_wait_for_idle, .power_down = ivpu_hw_40xx_power_down, .reset = ivpu_hw_40xx_reset, .boot_fw = ivpu_hw_40xx_boot_fw, .wdt_disable = ivpu_hw_40xx_wdt_disable, .diagnose_failure = ivpu_hw_40xx_diagnose_failure, + .profiling_freq_get = ivpu_hw_40xx_profiling_freq_get, + .profiling_freq_drive = ivpu_hw_40xx_profiling_freq_drive, .reg_pll_freq_get = ivpu_hw_40xx_reg_pll_freq_get, .reg_telemetry_offset_get = ivpu_hw_40xx_reg_telemetry_offset_get, .reg_telemetry_size_get = ivpu_hw_40xx_reg_telemetry_size_get, diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c index a4ca40b184d4..e86621f16f85 100644 --- a/drivers/accel/ivpu/ivpu_ipc.c +++ b/drivers/accel/ivpu/ivpu_ipc.c @@ -5,7 +5,7 @@ #include <linux/genalloc.h> #include <linux/highmem.h> -#include <linux/kthread.h> +#include <linux/pm_runtime.h> #include <linux/wait.h> #include "ivpu_drv.h" @@ -17,19 +17,12 @@ #include "ivpu_pm.h" #define IPC_MAX_RX_MSG 128 -#define IS_KTHREAD() (get_current()->flags & PF_KTHREAD) struct ivpu_ipc_tx_buf { struct ivpu_ipc_hdr ipc; struct vpu_jsm_msg jsm; }; -struct ivpu_ipc_rx_msg { - struct list_head link; - struct ivpu_ipc_hdr *ipc_hdr; - struct vpu_jsm_msg *jsm_msg; -}; - static void ivpu_ipc_msg_dump(struct ivpu_device *vdev, char *c, struct ivpu_ipc_hdr *ipc_hdr, u32 vpu_addr) { @@ -139,8 +132,49 @@ static void ivpu_ipc_tx(struct ivpu_device *vdev, u32 vpu_addr) ivpu_hw_reg_ipc_tx_set(vdev, vpu_addr); } -void -ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, u32 channel) +static void +ivpu_ipc_rx_msg_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + struct ivpu_ipc_rx_msg *rx_msg; + + lockdep_assert_held(&ipc->cons_lock); + lockdep_assert_irqs_disabled(); + + rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC); + if (!rx_msg) { + ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg); + return; + } + + atomic_inc(&ipc->rx_msg_count); + + rx_msg->ipc_hdr = ipc_hdr; + rx_msg->jsm_msg = jsm_msg; + rx_msg->callback = cons->rx_callback; + + if (rx_msg->callback) { + list_add_tail(&rx_msg->link, &ipc->cb_msg_list); + } else { + spin_lock(&cons->rx_lock); + list_add_tail(&rx_msg->link, &cons->rx_msg_list); + spin_unlock(&cons->rx_lock); + wake_up(&cons->rx_msg_wq); + } +} + +static void +ivpu_ipc_rx_msg_del(struct ivpu_device *vdev, struct ivpu_ipc_rx_msg *rx_msg) +{ + list_del(&rx_msg->link); + ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg); + atomic_dec(&vdev->ipc->rx_msg_count); + kfree(rx_msg); +} + +void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, + u32 channel, ivpu_ipc_rx_callback_t rx_callback) { struct ivpu_ipc_info *ipc = vdev->ipc; @@ -148,13 +182,15 @@ ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, cons->channel = channel; cons->tx_vpu_addr = 0; cons->request_id = 0; - spin_lock_init(&cons->rx_msg_lock); + cons->aborted = false; + cons->rx_callback = rx_callback; + spin_lock_init(&cons->rx_lock); INIT_LIST_HEAD(&cons->rx_msg_list); init_waitqueue_head(&cons->rx_msg_wq); - spin_lock_irq(&ipc->cons_list_lock); + spin_lock_irq(&ipc->cons_lock); list_add_tail(&cons->link, &ipc->cons_list); - spin_unlock_irq(&ipc->cons_list_lock); + spin_unlock_irq(&ipc->cons_lock); } void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons) @@ -162,18 +198,14 @@ void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *c struct ivpu_ipc_info *ipc = vdev->ipc; struct ivpu_ipc_rx_msg *rx_msg, *r; - spin_lock_irq(&ipc->cons_list_lock); + spin_lock_irq(&ipc->cons_lock); list_del(&cons->link); - spin_unlock_irq(&ipc->cons_list_lock); - - spin_lock_irq(&cons->rx_msg_lock); - list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link) { - list_del(&rx_msg->link); - ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg); - atomic_dec(&ipc->rx_msg_count); - kfree(rx_msg); - } - spin_unlock_irq(&cons->rx_msg_lock); + spin_unlock_irq(&ipc->cons_lock); + + spin_lock_irq(&cons->rx_lock); + list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link) + ivpu_ipc_rx_msg_del(vdev, rx_msg); + spin_unlock_irq(&cons->rx_lock); ivpu_ipc_tx_release(vdev, cons->tx_vpu_addr); } @@ -202,52 +234,61 @@ unlock: return ret; } +static bool ivpu_ipc_rx_need_wakeup(struct ivpu_ipc_consumer *cons) +{ + bool ret; + + spin_lock_irq(&cons->rx_lock); + ret = !list_empty(&cons->rx_msg_list) || cons->aborted; + spin_unlock_irq(&cons->rx_lock); + + return ret; +} + int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, struct ivpu_ipc_hdr *ipc_buf, - struct vpu_jsm_msg *ipc_payload, unsigned long timeout_ms) + struct vpu_jsm_msg *jsm_msg, unsigned long timeout_ms) { - struct ivpu_ipc_info *ipc = vdev->ipc; struct ivpu_ipc_rx_msg *rx_msg; int wait_ret, ret = 0; + if (drm_WARN_ONCE(&vdev->drm, cons->rx_callback, "Consumer works only in async mode\n")) + return -EINVAL; + wait_ret = wait_event_timeout(cons->rx_msg_wq, - (IS_KTHREAD() && kthread_should_stop()) || - !list_empty(&cons->rx_msg_list), + ivpu_ipc_rx_need_wakeup(cons), msecs_to_jiffies(timeout_ms)); - if (IS_KTHREAD() && kthread_should_stop()) - return -EINTR; - if (wait_ret == 0) return -ETIMEDOUT; - spin_lock_irq(&cons->rx_msg_lock); + spin_lock_irq(&cons->rx_lock); + if (cons->aborted) { + spin_unlock_irq(&cons->rx_lock); + return -ECANCELED; + } rx_msg = list_first_entry_or_null(&cons->rx_msg_list, struct ivpu_ipc_rx_msg, link); if (!rx_msg) { - spin_unlock_irq(&cons->rx_msg_lock); + spin_unlock_irq(&cons->rx_lock); return -EAGAIN; } - list_del(&rx_msg->link); - spin_unlock_irq(&cons->rx_msg_lock); if (ipc_buf) memcpy(ipc_buf, rx_msg->ipc_hdr, sizeof(*ipc_buf)); if (rx_msg->jsm_msg) { - u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*ipc_payload)); + u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*jsm_msg)); if (rx_msg->jsm_msg->result != VPU_JSM_STATUS_SUCCESS) { ivpu_dbg(vdev, IPC, "IPC resp result error: %d\n", rx_msg->jsm_msg->result); ret = -EBADMSG; } - if (ipc_payload) - memcpy(ipc_payload, rx_msg->jsm_msg, size); + if (jsm_msg) + memcpy(jsm_msg, rx_msg->jsm_msg, size); } - ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg); - atomic_dec(&ipc->rx_msg_count); - kfree(rx_msg); - + ivpu_ipc_rx_msg_del(vdev, rx_msg); + spin_unlock_irq(&cons->rx_lock); return ret; } @@ -260,7 +301,7 @@ ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req struct ivpu_ipc_consumer cons; int ret; - ivpu_ipc_consumer_add(vdev, &cons, channel); + ivpu_ipc_consumer_add(vdev, &cons, channel, NULL); ret = ivpu_ipc_send(vdev, &cons, req); if (ret) { @@ -285,23 +326,19 @@ consumer_del: return ret; } -int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, - enum vpu_ipc_msg_type expected_resp_type, - struct vpu_jsm_msg *resp, u32 channel, - unsigned long timeout_ms) +int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, + u32 channel, unsigned long timeout_ms) { struct vpu_jsm_msg hb_req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB }; struct vpu_jsm_msg hb_resp; int ret, hb_ret; - ret = ivpu_rpm_get(vdev); - if (ret < 0) - return ret; + drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev)); - ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp_type, resp, - channel, timeout_ms); + ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp, resp, channel, timeout_ms); if (ret != -ETIMEDOUT) - goto rpm_put; + return ret; hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE, &hb_resp, VPU_IPC_CHAN_ASYNC_CMD, @@ -311,7 +348,21 @@ int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, ivpu_pm_schedule_recovery(vdev); } -rpm_put: + return ret; +} + +int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, + u32 channel, unsigned long timeout_ms) +{ + int ret; + + ret = ivpu_rpm_get(vdev); + if (ret < 0) + return ret; + + ret = ivpu_ipc_send_receive_active(vdev, req, expected_resp, resp, channel, timeout_ms); + ivpu_rpm_put(vdev); return ret; } @@ -329,35 +380,7 @@ ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons return false; } -static void -ivpu_ipc_dispatch(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, - struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg) -{ - struct ivpu_ipc_info *ipc = vdev->ipc; - struct ivpu_ipc_rx_msg *rx_msg; - unsigned long flags; - - lockdep_assert_held(&ipc->cons_list_lock); - - rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC); - if (!rx_msg) { - ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg); - return; - } - - atomic_inc(&ipc->rx_msg_count); - - rx_msg->ipc_hdr = ipc_hdr; - rx_msg->jsm_msg = jsm_msg; - - spin_lock_irqsave(&cons->rx_msg_lock, flags); - list_add_tail(&rx_msg->link, &cons->rx_msg_list); - spin_unlock_irqrestore(&cons->rx_msg_lock, flags); - - wake_up(&cons->rx_msg_wq); -} - -int ivpu_ipc_irq_handler(struct ivpu_device *vdev) +void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread) { struct ivpu_ipc_info *ipc = vdev->ipc; struct ivpu_ipc_consumer *cons; @@ -375,7 +398,7 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev) vpu_addr = ivpu_hw_reg_ipc_rx_addr_get(vdev); if (vpu_addr == REG_IO_ERROR) { ivpu_err_ratelimited(vdev, "Failed to read IPC rx addr register\n"); - return -EIO; + return; } ipc_hdr = ivpu_to_cpu_addr(ipc->mem_rx, vpu_addr); @@ -405,15 +428,15 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev) } dispatched = false; - spin_lock_irqsave(&ipc->cons_list_lock, flags); + spin_lock_irqsave(&ipc->cons_lock, flags); list_for_each_entry(cons, &ipc->cons_list, link) { if (ivpu_ipc_match_consumer(vdev, cons, ipc_hdr, jsm_msg)) { - ivpu_ipc_dispatch(vdev, cons, ipc_hdr, jsm_msg); + ivpu_ipc_rx_msg_add(vdev, cons, ipc_hdr, jsm_msg); dispatched = true; break; } } - spin_unlock_irqrestore(&ipc->cons_list_lock, flags); + spin_unlock_irqrestore(&ipc->cons_lock, flags); if (!dispatched) { ivpu_dbg(vdev, IPC, "IPC RX msg 0x%x dropped (no consumer)\n", vpu_addr); @@ -421,7 +444,28 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev) } } - return 0; + if (wake_thread) + *wake_thread = !list_empty(&ipc->cb_msg_list); +} + +irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev) +{ + struct ivpu_ipc_info *ipc = vdev->ipc; + struct ivpu_ipc_rx_msg *rx_msg, *r; + struct list_head cb_msg_list; + + INIT_LIST_HEAD(&cb_msg_list); + + spin_lock_irq(&ipc->cons_lock); + list_splice_tail_init(&ipc->cb_msg_list, &cb_msg_list); + spin_unlock_irq(&ipc->cons_lock); + + list_for_each_entry_safe(rx_msg, r, &cb_msg_list, link) { + rx_msg->callback(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg); + ivpu_ipc_rx_msg_del(vdev, rx_msg); + } + + return IRQ_HANDLED; } int ivpu_ipc_init(struct ivpu_device *vdev) @@ -456,10 +500,10 @@ int ivpu_ipc_init(struct ivpu_device *vdev) goto err_free_rx; } + spin_lock_init(&ipc->cons_lock); INIT_LIST_HEAD(&ipc->cons_list); - spin_lock_init(&ipc->cons_list_lock); + INIT_LIST_HEAD(&ipc->cb_msg_list); drmm_mutex_init(&vdev->drm, &ipc->lock); - ivpu_ipc_reset(vdev); return 0; @@ -472,6 +516,13 @@ err_free_tx: void ivpu_ipc_fini(struct ivpu_device *vdev) { + struct ivpu_ipc_info *ipc = vdev->ipc; + + drm_WARN_ON(&vdev->drm, ipc->on); + drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cons_list)); + drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list)); + drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0); + ivpu_ipc_mem_fini(vdev); } @@ -488,16 +539,27 @@ void ivpu_ipc_disable(struct ivpu_device *vdev) { struct ivpu_ipc_info *ipc = vdev->ipc; struct ivpu_ipc_consumer *cons, *c; - unsigned long flags; + struct ivpu_ipc_rx_msg *rx_msg, *r; + + drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list)); mutex_lock(&ipc->lock); ipc->on = false; mutex_unlock(&ipc->lock); - spin_lock_irqsave(&ipc->cons_list_lock, flags); - list_for_each_entry_safe(cons, c, &ipc->cons_list, link) + spin_lock_irq(&ipc->cons_lock); + list_for_each_entry_safe(cons, c, &ipc->cons_list, link) { + spin_lock(&cons->rx_lock); + if (!cons->rx_callback) + cons->aborted = true; + list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link) + ivpu_ipc_rx_msg_del(vdev, rx_msg); + spin_unlock(&cons->rx_lock); wake_up(&cons->rx_msg_wq); - spin_unlock_irqrestore(&ipc->cons_list_lock, flags); + } + spin_unlock_irq(&ipc->cons_lock); + + drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0); } void ivpu_ipc_reset(struct ivpu_device *vdev) @@ -505,6 +567,7 @@ void ivpu_ipc_reset(struct ivpu_device *vdev) struct ivpu_ipc_info *ipc = vdev->ipc; mutex_lock(&ipc->lock); + drm_WARN_ON(&vdev->drm, ipc->on); memset(ivpu_bo_vaddr(ipc->mem_tx), 0, ivpu_bo_size(ipc->mem_tx)); memset(ivpu_bo_vaddr(ipc->mem_rx), 0, ivpu_bo_size(ipc->mem_rx)); diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h index 68f5b6668e00..40ca3cc4e61f 100644 --- a/drivers/accel/ivpu/ivpu_ipc.h +++ b/drivers/accel/ivpu/ivpu_ipc.h @@ -42,13 +42,26 @@ struct ivpu_ipc_hdr { u8 status; } __packed __aligned(IVPU_IPC_ALIGNMENT); +typedef void (*ivpu_ipc_rx_callback_t)(struct ivpu_device *vdev, + struct ivpu_ipc_hdr *ipc_hdr, + struct vpu_jsm_msg *jsm_msg); + +struct ivpu_ipc_rx_msg { + struct list_head link; + struct ivpu_ipc_hdr *ipc_hdr; + struct vpu_jsm_msg *jsm_msg; + ivpu_ipc_rx_callback_t callback; +}; + struct ivpu_ipc_consumer { struct list_head link; u32 channel; u32 tx_vpu_addr; u32 request_id; + bool aborted; + ivpu_ipc_rx_callback_t rx_callback; - spinlock_t rx_msg_lock; /* Protects rx_msg_list */ + spinlock_t rx_lock; /* Protects rx_msg_list and aborted */ struct list_head rx_msg_list; wait_queue_head_t rx_msg_wq; }; @@ -60,8 +73,9 @@ struct ivpu_ipc_info { atomic_t rx_msg_count; - spinlock_t cons_list_lock; /* Protects cons_list */ + spinlock_t cons_lock; /* Protects cons_list and cb_msg_list */ struct list_head cons_list; + struct list_head cb_msg_list; atomic_t request_id; struct mutex lock; /* Lock on status */ @@ -75,19 +89,22 @@ void ivpu_ipc_enable(struct ivpu_device *vdev); void ivpu_ipc_disable(struct ivpu_device *vdev); void ivpu_ipc_reset(struct ivpu_device *vdev); -int ivpu_ipc_irq_handler(struct ivpu_device *vdev); +void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread); +irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev); void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, - u32 channel); + u32 channel, ivpu_ipc_rx_callback_t callback); void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons); int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, - struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *ipc_payload, + struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *jsm_msg, unsigned long timeout_ms); +int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req, + enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, + u32 channel, unsigned long timeout_ms); int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req, - enum vpu_ipc_msg_type expected_resp_type, - struct vpu_jsm_msg *resp, u32 channel, - unsigned long timeout_ms); + enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp, + u32 channel, unsigned long timeout_ms); #endif /* __IVPU_IPC_H__ */ diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c index 8983e3a4fdf9..7206cf9cdb4a 100644 --- a/drivers/accel/ivpu/ivpu_job.c +++ b/drivers/accel/ivpu/ivpu_job.c @@ -7,7 +7,6 @@ #include <linux/bitfield.h> #include <linux/highmem.h> -#include <linux/kthread.h> #include <linux/pci.h> #include <linux/module.h> #include <uapi/drm/ivpu_accel.h> @@ -24,10 +23,6 @@ #define JOB_ID_CONTEXT_MASK GENMASK(31, 8) #define JOB_MAX_BUFFER_COUNT 65535 -static unsigned int ivpu_tdr_timeout_ms; -module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, uint, 0644); -MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default"); - static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq) { ivpu_hw_reg_db_set(vdev, cmdq->db_id); @@ -196,6 +191,8 @@ static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) entry->batch_buf_addr = job->cmd_buf_vpu_addr; entry->job_id = job->job_id; entry->flags = 0; + if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_SUBMISSION)) + entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK; wmb(); /* Ensure that tail is updated after filling entry */ header->tail = next_entry; wmb(); /* Flush WC buffer for jobq header */ @@ -264,7 +261,7 @@ static void job_release(struct kref *ref) for (i = 0; i < job->bo_count; i++) if (job->bos[i]) - drm_gem_object_put(&job->bos[i]->base); + drm_gem_object_put(&job->bos[i]->base.base); dma_fence_put(job->done_fence); ivpu_file_priv_put(&job->file_priv); @@ -340,23 +337,12 @@ static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status) ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); + ivpu_stop_job_timeout_detection(vdev); + job_put(job); return 0; } -static void ivpu_job_done_message(struct ivpu_device *vdev, void *msg) -{ - struct vpu_ipc_msg_payload_job_done *payload; - struct vpu_jsm_msg *job_ret_msg = msg; - int ret; - - payload = (struct vpu_ipc_msg_payload_job_done *)&job_ret_msg->payload; - - ret = ivpu_job_done(vdev, payload->job_id, payload->job_status); - if (ret) - ivpu_err(vdev, "Failed to finish job %d: %d\n", payload->job_id, ret); -} - void ivpu_jobs_abort_all(struct ivpu_device *vdev) { struct ivpu_job *job; @@ -398,11 +384,13 @@ static int ivpu_direct_job_submission(struct ivpu_job *job) if (ret) goto err_xa_erase; + ivpu_start_job_timeout_detection(vdev); + ivpu_dbg(vdev, JOB, "Job submitted: id %3u addr 0x%llx ctx %2d engine %d next %d\n", job->job_id, job->cmd_buf_vpu_addr, file_priv->ctx.id, job->engine_idx, cmdq->jobq->header.tail); - if (ivpu_test_mode == IVPU_TEST_MODE_NULL_HW) { + if (ivpu_test_mode & IVPU_TEST_MODE_NULL_HW) { ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); cmdq->jobq->header.head = cmdq->jobq->header.tail; wmb(); /* Flush WC buffer for jobq header */ @@ -448,7 +436,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 } bo = job->bos[CMD_BUF_IDX]; - if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) { + if (!dma_resv_test_signaled(bo->base.base.resv, DMA_RESV_USAGE_READ)) { ivpu_warn(vdev, "Buffer is already in use\n"); return -EBUSY; } @@ -468,7 +456,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 } for (i = 0; i < buf_count; i++) { - ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1); + ret = dma_resv_reserve_fences(job->bos[i]->base.base.resv, 1); if (ret) { ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret); goto unlock_reservations; @@ -477,7 +465,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 for (i = 0; i < buf_count; i++) { usage = (i == CMD_BUF_IDX) ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_BOOKKEEP; - dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, usage); + dma_resv_add_fence(job->bos[i]->base.base.resv, job->done_fence, usage); } unlock_reservations: @@ -562,61 +550,36 @@ free_handles: return ret; } -static int ivpu_job_done_thread(void *arg) +static void +ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr, + struct vpu_jsm_msg *jsm_msg) { - struct ivpu_device *vdev = (struct ivpu_device *)arg; - struct ivpu_ipc_consumer cons; - struct vpu_jsm_msg jsm_msg; - bool jobs_submitted; - unsigned int timeout; + struct vpu_ipc_msg_payload_job_done *payload; int ret; - ivpu_dbg(vdev, JOB, "Started %s\n", __func__); - - ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET); - - while (!kthread_should_stop()) { - timeout = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr; - jobs_submitted = !xa_empty(&vdev->submitted_jobs_xa); - ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout); - if (!ret) { - ivpu_job_done_message(vdev, &jsm_msg); - } else if (ret == -ETIMEDOUT) { - if (jobs_submitted && !xa_empty(&vdev->submitted_jobs_xa)) { - ivpu_err(vdev, "TDR detected, timeout %d ms", timeout); - ivpu_hw_diagnose_failure(vdev); - ivpu_pm_schedule_recovery(vdev); - } - } + if (!jsm_msg) { + ivpu_err(vdev, "IPC message has no JSM payload\n"); + return; } - ivpu_ipc_consumer_del(vdev, &cons); - - ivpu_jobs_abort_all(vdev); + if (jsm_msg->result != VPU_JSM_STATUS_SUCCESS) { + ivpu_err(vdev, "Invalid JSM message result: %d\n", jsm_msg->result); + return; + } - ivpu_dbg(vdev, JOB, "Stopped %s\n", __func__); - return 0; + payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload; + ret = ivpu_job_done(vdev, payload->job_id, payload->job_status); + if (!ret && !xa_empty(&vdev->submitted_jobs_xa)) + ivpu_start_job_timeout_detection(vdev); } -int ivpu_job_done_thread_init(struct ivpu_device *vdev) +void ivpu_job_done_consumer_init(struct ivpu_device *vdev) { - struct task_struct *thread; - - thread = kthread_run(&ivpu_job_done_thread, (void *)vdev, "ivpu_job_done_thread"); - if (IS_ERR(thread)) { - ivpu_err(vdev, "Failed to start job completion thread\n"); - return -EIO; - } - - get_task_struct(thread); - wake_up_process(thread); - - vdev->job_done_thread = thread; - - return 0; + ivpu_ipc_consumer_add(vdev, &vdev->job_done_consumer, + VPU_IPC_CHAN_JOB_RET, ivpu_job_done_callback); } -void ivpu_job_done_thread_fini(struct ivpu_device *vdev) +void ivpu_job_done_consumer_fini(struct ivpu_device *vdev) { - kthread_stop_put(vdev->job_done_thread); + ivpu_ipc_consumer_del(vdev, &vdev->job_done_consumer); } diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h index 5514c2d8a609..45a2f2ec82e5 100644 --- a/drivers/accel/ivpu/ivpu_job.h +++ b/drivers/accel/ivpu/ivpu_job.h @@ -59,8 +59,8 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv); void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev); -int ivpu_job_done_thread_init(struct ivpu_device *vdev); -void ivpu_job_done_thread_fini(struct ivpu_device *vdev); +void ivpu_job_done_consumer_init(struct ivpu_device *vdev); +void ivpu_job_done_consumer_fini(struct ivpu_device *vdev); void ivpu_jobs_abort_all(struct ivpu_device *vdev); diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c index 0c2fe7142024..8cea0dd731b9 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.c +++ b/drivers/accel/ivpu/ivpu_jsm_msg.c @@ -4,6 +4,7 @@ */ #include "ivpu_drv.h" +#include "ivpu_hw.h" #include "ivpu_ipc.h" #include "ivpu_jsm_msg.h" @@ -36,6 +37,17 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type) IVPU_CASE_TO_STR(VPU_JSM_MSG_DESTROY_CMD_QUEUE); IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES); IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_REGISTER_DB); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_CMDQ); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SUSPEND_CMDQ); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_ENGINE_RESUME); + IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP_RSP); IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT); IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL); IVPU_CASE_TO_STR(VPU_JSM_MSG_JOB_DONE); @@ -65,6 +77,12 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type) IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP); IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT_DONE); IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL_RSP); + IVPU_CASE_TO_STR(VPU_JSM_MSG_PWR_D0I3_ENTER); + IVPU_CASE_TO_STR(VPU_JSM_MSG_PWR_D0I3_ENTER_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_ENABLE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_ENABLE_DONE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_DISABLE); + IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_DISABLE_DONE); } #undef IVPU_CASE_TO_STR @@ -243,3 +261,23 @@ int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid) return ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SSID_RELEASE_DONE, &resp, VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); } + +int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_PWR_D0I3_ENTER }; + struct vpu_jsm_msg resp; + int ret; + + if (IVPU_WA(disable_d0i3_msg)) + return 0; + + req.payload.pwr_d0i3_enter.send_response = 1; + + ret = ivpu_ipc_send_receive_active(vdev, &req, VPU_JSM_MSG_PWR_D0I3_ENTER_DONE, + &resp, VPU_IPC_CHAN_GEN_CMD, + vdev->timeout.d0i3_entry_msg); + if (ret) + return ret; + + return ivpu_hw_wait_for_idle(vdev); +} diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h index 66979a948c7c..ae75e5dbcc41 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.h +++ b/drivers/accel/ivpu/ivpu_jsm_msg.h @@ -22,4 +22,5 @@ int ivpu_jsm_trace_get_capability(struct ivpu_device *vdev, u32 *trace_destinati int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 trace_destination_mask, u64 trace_hw_component_mask); int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid); +int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev); #endif diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c index 2538c78fbebe..2228c44b115f 100644 --- a/drivers/accel/ivpu/ivpu_mmu.c +++ b/drivers/accel/ivpu/ivpu_mmu.c @@ -230,7 +230,12 @@ (REG_FLD(IVPU_MMU_REG_GERROR, MSI_PRIQ_ABT)) | \ (REG_FLD(IVPU_MMU_REG_GERROR, MSI_ABT))) -static char *ivpu_mmu_event_to_str(u32 cmd) +#define IVPU_MMU_CERROR_NONE 0x0 +#define IVPU_MMU_CERROR_ILL 0x1 +#define IVPU_MMU_CERROR_ABT 0x2 +#define IVPU_MMU_CERROR_ATC_INV_SYNC 0x3 + +static const char *ivpu_mmu_event_to_str(u32 cmd) { switch (cmd) { case IVPU_MMU_EVT_F_UUT: @@ -276,6 +281,22 @@ static char *ivpu_mmu_event_to_str(u32 cmd) } } +static const char *ivpu_mmu_cmdq_err_to_str(u32 err) +{ + switch (err) { + case IVPU_MMU_CERROR_NONE: + return "No CMDQ Error"; + case IVPU_MMU_CERROR_ILL: + return "Illegal command"; + case IVPU_MMU_CERROR_ABT: + return "External abort on CMDQ read"; + case IVPU_MMU_CERROR_ATC_INV_SYNC: + return "Sync failed to complete ATS invalidation"; + default: + return "Unknown CMDQ Error"; + } +} + static void ivpu_mmu_config_check(struct ivpu_device *vdev) { u32 val_ref; @@ -479,10 +500,7 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev) u64 val; int ret; - val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_SYNC) | - FIELD_PREP(IVPU_MMU_CMD_SYNC_0_CS, 0x2) | - FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSH, 0x3) | - FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSI_ATTR, 0xf); + val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_SYNC); ret = ivpu_mmu_cmdq_cmd_write(vdev, "SYNC", val, 0); if (ret) @@ -492,8 +510,15 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev) REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, q->prod); ret = ivpu_mmu_cmdq_wait_for_cons(vdev); - if (ret) - ivpu_err(vdev, "Timed out waiting for consumer: %d\n", ret); + if (ret) { + u32 err; + + val = REGV_RD32(IVPU_MMU_REG_CMDQ_CONS); + err = REG_GET_FLD(IVPU_MMU_REG_CMDQ_CONS, ERR, val); + + ivpu_err(vdev, "Timed out waiting for MMU consumer: %d, error: %s\n", ret, + ivpu_mmu_cmdq_err_to_str(err)); + } return ret; } @@ -750,9 +775,12 @@ int ivpu_mmu_init(struct ivpu_device *vdev) ivpu_dbg(vdev, MMU, "Init..\n"); - drmm_mutex_init(&vdev->drm, &mmu->lock); ivpu_mmu_config_check(vdev); + ret = drmm_mutex_init(&vdev->drm, &mmu->lock); + if (ret) + return ret; + ret = ivpu_mmu_structs_alloc(vdev); if (ret) return ret; diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c index c1050a2df954..12a8c09d4547 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.c +++ b/drivers/accel/ivpu/ivpu_mmu_context.c @@ -5,6 +5,9 @@ #include <linux/bitfield.h> #include <linux/highmem.h> +#include <linux/set_memory.h> + +#include <drm/drm_cache.h> #include "ivpu_drv.h" #include "ivpu_hw.h" @@ -39,12 +42,57 @@ #define IVPU_MMU_ENTRY_MAPPED (IVPU_MMU_ENTRY_FLAG_AF | IVPU_MMU_ENTRY_FLAG_USER | \ IVPU_MMU_ENTRY_FLAG_NG | IVPU_MMU_ENTRY_VALID) +static void *ivpu_pgtable_alloc_page(struct ivpu_device *vdev, dma_addr_t *dma) +{ + dma_addr_t dma_addr; + struct page *page; + void *cpu; + + page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); + if (!page) + return NULL; + + set_pages_array_wc(&page, 1); + + dma_addr = dma_map_page(vdev->drm.dev, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(vdev->drm.dev, dma_addr)) + goto err_free_page; + + cpu = vmap(&page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL)); + if (!cpu) + goto err_dma_unmap_page; + + + *dma = dma_addr; + return cpu; + +err_dma_unmap_page: + dma_unmap_page(vdev->drm.dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + +err_free_page: + put_page(page); + return NULL; +} + +static void ivpu_pgtable_free_page(struct ivpu_device *vdev, u64 *cpu_addr, dma_addr_t dma_addr) +{ + struct page *page; + + if (cpu_addr) { + page = vmalloc_to_page(cpu_addr); + vunmap(cpu_addr); + dma_unmap_page(vdev->drm.dev, dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK, PAGE_SIZE, + DMA_BIDIRECTIONAL); + set_pages_array_wb(&page, 1); + put_page(page); + } +} + static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) { dma_addr_t pgd_dma; - pgtable->pgd_dma_ptr = dma_alloc_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma, - GFP_KERNEL); + pgtable->pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma); if (!pgtable->pgd_dma_ptr) return -ENOMEM; @@ -53,13 +101,6 @@ static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtab return 0; } -static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, u64 *cpu_addr, dma_addr_t dma_addr) -{ - if (cpu_addr) - dma_free_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, cpu_addr, - dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK); -} - static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable) { int pgd_idx, pud_idx, pmd_idx; @@ -84,19 +125,19 @@ static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgt pte_dma_ptr = pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx]; pte_dma = pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx]; - ivpu_mmu_pgtable_free(vdev, pte_dma_ptr, pte_dma); + ivpu_pgtable_free_page(vdev, pte_dma_ptr, pte_dma); } kfree(pgtable->pte_ptrs[pgd_idx][pud_idx]); - ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma); + ivpu_pgtable_free_page(vdev, pmd_dma_ptr, pmd_dma); } kfree(pgtable->pmd_ptrs[pgd_idx]); kfree(pgtable->pte_ptrs[pgd_idx]); - ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma); + ivpu_pgtable_free_page(vdev, pud_dma_ptr, pud_dma); } - ivpu_mmu_pgtable_free(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma); + ivpu_pgtable_free_page(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma); } static u64* @@ -108,7 +149,7 @@ ivpu_mmu_ensure_pud(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, if (pud_dma_ptr) return pud_dma_ptr; - pud_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pud_dma, GFP_KERNEL); + pud_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pud_dma); if (!pud_dma_ptr) return NULL; @@ -131,7 +172,7 @@ err_free_pmd_ptrs: kfree(pgtable->pmd_ptrs[pgd_idx]); err_free_pud_dma_ptr: - ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma); + ivpu_pgtable_free_page(vdev, pud_dma_ptr, pud_dma); return NULL; } @@ -145,7 +186,7 @@ ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, if (pmd_dma_ptr) return pmd_dma_ptr; - pmd_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL); + pmd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pmd_dma); if (!pmd_dma_ptr) return NULL; @@ -160,7 +201,7 @@ ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, return pmd_dma_ptr; err_free_pmd_dma_ptr: - ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma); + ivpu_pgtable_free_page(vdev, pmd_dma_ptr, pmd_dma); return NULL; } @@ -174,7 +215,7 @@ ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable, if (pte_dma_ptr) return pte_dma_ptr; - pte_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL); + pte_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pte_dma); if (!pte_dma_ptr) return NULL; @@ -249,38 +290,6 @@ static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_ad ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] = IVPU_MMU_ENTRY_INVALID; } -static void -ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size) -{ - struct ivpu_mmu_pgtable *pgtable = &ctx->pgtable; - u64 end_addr = vpu_addr + size; - - /* Align to PMD entry (2 MB) */ - vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1); - - while (vpu_addr < end_addr) { - int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr); - u64 pud_end = (pgd_idx + 1) * (u64)IVPU_MMU_PUD_MAP_SIZE; - - while (vpu_addr < end_addr && vpu_addr < pud_end) { - int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr); - u64 pmd_end = (pud_idx + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE; - - while (vpu_addr < end_addr && vpu_addr < pmd_end) { - int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr); - - clflush_cache_range(pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx], - IVPU_MMU_PGTABLE_SIZE); - vpu_addr += IVPU_MMU_PTE_MAP_SIZE; - } - clflush_cache_range(pgtable->pmd_ptrs[pgd_idx][pud_idx], - IVPU_MMU_PGTABLE_SIZE); - } - clflush_cache_range(pgtable->pud_ptrs[pgd_idx], IVPU_MMU_PGTABLE_SIZE); - } - clflush_cache_range(pgtable->pgd_dma_ptr, IVPU_MMU_PGTABLE_SIZE); -} - static int ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr, dma_addr_t dma_addr, size_t size, u64 prot) @@ -327,6 +336,9 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 prot; u64 i; + if (drm_WARN_ON(&vdev->drm, !ctx)) + return -EINVAL; + if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE)) return -EINVAL; @@ -349,10 +361,11 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, mutex_unlock(&ctx->lock); return ret; } - ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size); vpu_addr += size; } + /* Ensure page table modifications are flushed from wc buffers to memory */ + wmb(); mutex_unlock(&ctx->lock); ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); @@ -369,8 +382,8 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct int ret; u64 i; - if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE)) - ivpu_warn(vdev, "Unaligned vpu_addr: 0x%llx\n", vpu_addr); + if (drm_WARN_ON(&vdev->drm, !ctx)) + return; mutex_lock(&ctx->lock); @@ -378,10 +391,11 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct size_t size = sg_dma_len(sg) + sg->offset; ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size); - ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size); vpu_addr += size; } + /* Ensure page table modifications are flushed from wc buffers to memory */ + wmb(); mutex_unlock(&ctx->lock); ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id); @@ -390,28 +404,34 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct } int -ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx, - const struct ivpu_addr_range *range, - u64 size, struct drm_mm_node *node) +ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range, + u64 size, struct drm_mm_node *node) { - lockdep_assert_held(&ctx->lock); + int ret; + + WARN_ON(!range); + mutex_lock(&ctx->lock); if (!ivpu_disable_mmu_cont_pages && size >= IVPU_MMU_CONT_PAGES_SIZE) { - if (!drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_CONT_PAGES_SIZE, 0, - range->start, range->end, DRM_MM_INSERT_BEST)) - return 0; + ret = drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_CONT_PAGES_SIZE, 0, + range->start, range->end, DRM_MM_INSERT_BEST); + if (!ret) + goto unlock; } - return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, 0, - range->start, range->end, DRM_MM_INSERT_BEST); + ret = drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, 0, + range->start, range->end, DRM_MM_INSERT_BEST); +unlock: + mutex_unlock(&ctx->lock); + return ret; } void -ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, struct drm_mm_node *node) +ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node) { - lockdep_assert_held(&ctx->lock); - + mutex_lock(&ctx->lock); drm_mm_remove_node(node); + mutex_unlock(&ctx->lock); } static int @@ -421,7 +441,6 @@ ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u3 int ret; mutex_init(&ctx->lock); - INIT_LIST_HEAD(&ctx->bo_list); ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable); if (ret) { diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h index f15d8c630d8a..535db3a1fc74 100644 --- a/drivers/accel/ivpu/ivpu_mmu_context.h +++ b/drivers/accel/ivpu/ivpu_mmu_context.h @@ -23,10 +23,9 @@ struct ivpu_mmu_pgtable { }; struct ivpu_mmu_context { - struct mutex lock; /* protects: mm, pgtable, bo_list */ + struct mutex lock; /* Protects: mm, pgtable */ struct drm_mm mm; struct ivpu_mmu_pgtable pgtable; - struct list_head bo_list; u32 id; }; @@ -39,11 +38,9 @@ int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx); void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid); -int ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx, - const struct ivpu_addr_range *range, - u64 size, struct drm_mm_node *node); -void ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, - struct drm_mm_node *node); +int ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range, + u64 size, struct drm_mm_node *node); +void ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node); int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u64 vpu_addr, struct sg_table *sgt, bool llc_coherent); diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index e9b16cbc26f4..0af8864cb3b5 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -15,6 +15,7 @@ #include "ivpu_fw.h" #include "ivpu_ipc.h" #include "ivpu_job.h" +#include "ivpu_jsm_msg.h" #include "ivpu_mmu.h" #include "ivpu_pm.h" @@ -22,6 +23,10 @@ static bool ivpu_disable_recovery; module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644); MODULE_PARM_DESC(disable_recovery, "Disables recovery when VPU hang is detected"); +static unsigned long ivpu_tdr_timeout_ms; +module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644); +MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default"); + #define PM_RESCHEDULE_LIMIT 5 static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev) @@ -69,27 +74,31 @@ retry: ret = ivpu_hw_power_up(vdev); if (ret) { ivpu_err(vdev, "Failed to power up HW: %d\n", ret); - return ret; + goto err_power_down; } ret = ivpu_mmu_enable(vdev); if (ret) { ivpu_err(vdev, "Failed to resume MMU: %d\n", ret); - ivpu_hw_power_down(vdev); - return ret; + goto err_power_down; } ret = ivpu_boot(vdev); - if (ret) { - ivpu_mmu_disable(vdev); - ivpu_hw_power_down(vdev); - if (!ivpu_fw_is_cold_boot(vdev)) { - ivpu_warn(vdev, "Failed to resume the FW: %d. Retrying cold boot..\n", ret); - ivpu_pm_prepare_cold_boot(vdev); - goto retry; - } else { - ivpu_err(vdev, "Failed to resume the FW: %d\n", ret); - } + if (ret) + goto err_mmu_disable; + + return 0; + +err_mmu_disable: + ivpu_mmu_disable(vdev); +err_power_down: + ivpu_hw_power_down(vdev); + + if (!ivpu_fw_is_cold_boot(vdev)) { + ivpu_pm_prepare_cold_boot(vdev); + goto retry; + } else { + ivpu_err(vdev, "Failed to resume the FW: %d\n", ret); } return ret; @@ -136,6 +145,31 @@ void ivpu_pm_schedule_recovery(struct ivpu_device *vdev) } } +static void ivpu_job_timeout_work(struct work_struct *work) +{ + struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work); + struct ivpu_device *vdev = pm->vdev; + unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr; + + ivpu_err(vdev, "TDR detected, timeout %lu ms", timeout_ms); + ivpu_hw_diagnose_failure(vdev); + + ivpu_pm_schedule_recovery(vdev); +} + +void ivpu_start_job_timeout_detection(struct ivpu_device *vdev) +{ + unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr; + + /* No-op if already queued */ + queue_delayed_work(system_wq, &vdev->pm->job_timeout_work, msecs_to_jiffies(timeout_ms)); +} + +void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev) +{ + cancel_delayed_work_sync(&vdev->pm->job_timeout_work); +} + int ivpu_pm_suspend_cb(struct device *dev) { struct drm_device *drm = dev_get_drvdata(dev); @@ -153,6 +187,8 @@ int ivpu_pm_suspend_cb(struct device *dev) } } + ivpu_jsm_pwr_d0i3_enter(vdev); + ivpu_suspend(vdev); ivpu_pm_prepare_warm_boot(vdev); @@ -188,6 +224,7 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev) { struct drm_device *drm = dev_get_drvdata(dev); struct ivpu_device *vdev = to_ivpu_device(drm); + bool hw_is_idle = true; int ret; ivpu_dbg(vdev, PM, "Runtime suspend..\n"); @@ -200,11 +237,16 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev) return -EAGAIN; } + if (!vdev->pm->suspend_reschedule_counter) + hw_is_idle = false; + else if (ivpu_jsm_pwr_d0i3_enter(vdev)) + hw_is_idle = false; + ret = ivpu_suspend(vdev); if (ret) ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret); - if (!vdev->pm->suspend_reschedule_counter) { + if (!hw_is_idle) { ivpu_warn(vdev, "VPU failed to enter idle, force suspended.\n"); ivpu_pm_prepare_cold_boot(vdev); } else { @@ -304,6 +346,7 @@ void ivpu_pm_init(struct ivpu_device *vdev) atomic_set(&pm->in_reset, 0); INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work); + INIT_DELAYED_WORK(&pm->job_timeout_work, ivpu_job_timeout_work); if (ivpu_disable_recovery) delay = -1; @@ -318,6 +361,7 @@ void ivpu_pm_init(struct ivpu_device *vdev) void ivpu_pm_cancel_recovery(struct ivpu_device *vdev) { + drm_WARN_ON(&vdev->drm, delayed_work_pending(&vdev->pm->job_timeout_work)); cancel_work_sync(&vdev->pm->recovery_work); } diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h index 044db150be07..97c6e0b0aa42 100644 --- a/drivers/accel/ivpu/ivpu_pm.h +++ b/drivers/accel/ivpu/ivpu_pm.h @@ -12,6 +12,7 @@ struct ivpu_device; struct ivpu_pm_info { struct ivpu_device *vdev; + struct delayed_work job_timeout_work; struct work_struct recovery_work; atomic_t in_reset; atomic_t reset_counter; @@ -37,5 +38,7 @@ int __must_check ivpu_rpm_get_if_active(struct ivpu_device *vdev); void ivpu_rpm_put(struct ivpu_device *vdev); void ivpu_pm_schedule_recovery(struct ivpu_device *vdev); +void ivpu_start_job_timeout_detection(struct ivpu_device *vdev); +void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev); #endif /* __IVPU_PM_H__ */ diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h index 6b71be92ba65..04c954258563 100644 --- a/drivers/accel/ivpu/vpu_boot_api.h +++ b/drivers/accel/ivpu/vpu_boot_api.h @@ -11,7 +11,10 @@ * The bellow values will be used to construct the version info this way: * fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) | * VPU_BOOT_API_VER_MINOR; - * VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes. + * VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes + * This information is collected by using vpuip_2/application/vpuFirmware/make_std_fw_image.py + * If a header is missing this info we ignore the header, if a header is missing or contains + * partial info a build error will be generated. */ /* @@ -24,12 +27,12 @@ * Minor version changes when API backward compatibility is preserved. * Resets to 0 if Major version is incremented. */ -#define VPU_BOOT_API_VER_MINOR 12 +#define VPU_BOOT_API_VER_MINOR 20 /* * API header changed (field names, documentation, formatting) but API itself has not been changed */ -#define VPU_BOOT_API_VER_PATCH 2 +#define VPU_BOOT_API_VER_PATCH 4 /* * Index in the API version table @@ -63,6 +66,12 @@ struct vpu_firmware_header { /* Size of memory require for firmware execution */ u32 runtime_size; u32 shave_nn_fw_size; + /* Size of primary preemption buffer. */ + u32 preemption_buffer_1_size; + /* Size of secondary preemption buffer. */ + u32 preemption_buffer_2_size; + /* Space reserved for future preemption-related fields. */ + u32 preemption_reserved[6]; }; /* @@ -89,6 +98,14 @@ enum VPU_BOOT_L2_CACHE_CFG_TYPE { VPU_BOOT_L2_CACHE_CFG_NUM = 2 }; +/** VPU MCA ECC signalling mode. By default, no signalling is used */ +enum VPU_BOOT_MCA_ECC_SIGNAL_TYPE { + VPU_BOOT_MCA_ECC_NONE = 0, + VPU_BOOT_MCA_ECC_CORR = 1, + VPU_BOOT_MCA_ECC_FATAL = 2, + VPU_BOOT_MCA_ECC_BOTH = 3 +}; + /** * Logging destinations. * @@ -131,9 +148,11 @@ enum vpu_trace_destination { #define VPU_TRACE_PROC_BIT_ACT_SHV_3 22 #define VPU_TRACE_PROC_NO_OF_HW_DEVS 23 -/* KMB HW component IDs are sequential, so define first and last IDs. */ -#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_LRT -#define VPU_TRACE_PROC_BIT_KMB_LAST VPU_TRACE_PROC_BIT_SHV_15 +/* VPU 30xx HW component IDs are sequential, so define first and last IDs. */ +#define VPU_TRACE_PROC_BIT_30XX_FIRST VPU_TRACE_PROC_BIT_LRT +#define VPU_TRACE_PROC_BIT_30XX_LAST VPU_TRACE_PROC_BIT_SHV_15 +#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_30XX_FIRST +#define VPU_TRACE_PROC_BIT_KMB_LAST VPU_TRACE_PROC_BIT_30XX_LAST struct vpu_boot_l2_cache_config { u8 use; @@ -148,6 +167,25 @@ struct vpu_warm_boot_section { u32 is_clear_op; }; +/* + * When HW scheduling mode is enabled, a present period is defined. + * It will be used by VPU to swap between normal and focus priorities + * to prevent starving of normal priority band (when implemented). + * Host must provide a valid value at boot time in + * `vpu_focus_present_timer_ms`. If the value provided by the host is not within the + * defined range a default value will be used. Here we define the min. and max. + * allowed values and the and default value of the present period. Units are milliseconds. + */ +#define VPU_PRESENT_CALL_PERIOD_MS_DEFAULT 50 +#define VPU_PRESENT_CALL_PERIOD_MS_MIN 16 +#define VPU_PRESENT_CALL_PERIOD_MS_MAX 10000 + +/** + * Macros to enable various operation modes within the VPU. + * To be defined as part of 32 bit mask. + */ +#define VPU_OP_MODE_SURVIVABILITY 0x1 + struct vpu_boot_params { u32 magic; u32 vpu_id; @@ -218,6 +256,7 @@ struct vpu_boot_params { * the threshold will not be logged); applies to every enabled logging * destination and loggable HW component. See 'mvLog_t' enum for acceptable * values. + * TODO: EISW-33556: Move log level definition (mvLog_t) to this file. */ u32 default_trace_level; u32 boot_type; @@ -249,7 +288,36 @@ struct vpu_boot_params { u32 temp_sensor_period_ms; /** PLL ratio for efficient clock frequency */ u32 pn_freq_pll_ratio; - u32 pad4[28]; + /** DVFS Mode: Default: 0, Max Performance: 1, On Demand: 2, Power Save: 3 */ + u32 dvfs_mode; + /** + * Depending on DVFS Mode: + * On-demand: Default if 0. + * Bit 0-7 - uint8_t: Highest residency percent + * Bit 8-15 - uint8_t: High residency percent + * Bit 16-23 - uint8_t: Low residency percent + * Bit 24-31 - uint8_t: Lowest residency percent + * Bit 32-35 - unsigned 4b: PLL Ratio increase amount on highest residency + * Bit 36-39 - unsigned 4b: PLL Ratio increase amount on high residency + * Bit 40-43 - unsigned 4b: PLL Ratio decrease amount on low residency + * Bit 44-47 - unsigned 4b: PLL Ratio decrease amount on lowest frequency + * Bit 48-55 - uint8_t: Period (ms) for residency decisions + * Bit 56-63 - uint8_t: Averaging windows (as multiples of period. Max: 30 decimal) + * Power Save/Max Performance: Unused + */ + u64 dvfs_param; + /** + * D0i3 delayed entry + * Bit0: Disable CPU state save on D0i2 entry flow. + * 0: Every D0i2 entry saves state. Save state IPC message ignored. + * 1: IPC message required to save state on D0i3 entry flow. + */ + u32 d0i3_delayed_entry; + /* Time spent by VPU in D0i3 state */ + u64 d0i3_residency_time_us; + /* Value of VPU perf counter at the time of entering D0i3 state . */ + u64 d0i3_entry_vpu_ts; + u32 pad4[20]; /* Warm boot information: 0x400 - 0x43F */ u32 warm_boot_sections_count; u32 warm_boot_start_address_reference; @@ -274,8 +342,12 @@ struct vpu_boot_params { u32 vpu_scheduling_mode; /* Present call period in milliseconds. */ u32 vpu_focus_present_timer_ms; - /* Unused/reserved: 0x478 - 0xFFF */ - u32 pad6[738]; + /* VPU ECC Signaling */ + u32 vpu_uses_ecc_mca_signal; + /* Values defined by VPU_OP_MODE* macros */ + u32 vpu_operation_mode; + /* Unused/reserved: 0x480 - 0xFFF */ + u32 pad6[736]; }; /* diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h index 2949ec8365bd..7da7622742be 100644 --- a/drivers/accel/ivpu/vpu_jsm_api.h +++ b/drivers/accel/ivpu/vpu_jsm_api.h @@ -22,12 +22,12 @@ /* * Minor version changes when API backward compatibility is preserved. */ -#define VPU_JSM_API_VER_MINOR 0 +#define VPU_JSM_API_VER_MINOR 15 /* * API header changed (field names, documentation, formatting) but API itself has not been changed */ -#define VPU_JSM_API_VER_PATCH 1 +#define VPU_JSM_API_VER_PATCH 0 /* * Index in the API version table @@ -84,11 +84,13 @@ * Job flags bit masks. */ #define VPU_JOB_FLAGS_NULL_SUBMISSION_MASK 0x00000001 +#define VPU_JOB_FLAGS_PRIVATE_DATA_MASK 0xFF000000 /* * Sizes of the reserved areas in jobs, in bytes. */ -#define VPU_JOB_RESERVED_BYTES 16 +#define VPU_JOB_RESERVED_BYTES 8 + /* * Sizes of the reserved areas in job queues, in bytes. */ @@ -109,6 +111,20 @@ #define VPU_DYNDBG_CMD_MAX_LEN 96 /* + * For HWS command queue scheduling, we can prioritise command queues inside the + * same process with a relative in-process priority. Valid values for relative + * priority are given below - max and min. + */ +#define VPU_HWS_COMMAND_QUEUE_MAX_IN_PROCESS_PRIORITY 7 +#define VPU_HWS_COMMAND_QUEUE_MIN_IN_PROCESS_PRIORITY -7 + +/* + * For HWS priority scheduling, we can have multiple realtime priority bands. + * They are numbered 0 to a MAX. + */ +#define VPU_HWS_MAX_REALTIME_PRIORITY_LEVEL 31U + +/* * Job format. */ struct vpu_job_queue_entry { @@ -117,8 +133,14 @@ struct vpu_job_queue_entry { u32 flags; /**< Flags bit field, see VPU_JOB_FLAGS_* above */ u64 root_page_table_addr; /**< Address of root page table to use for this job */ u64 root_page_table_update_counter; /**< Page tables update events counter */ - u64 preemption_buffer_address; /**< Address of the preemption buffer to use for this job */ - u64 preemption_buffer_size; /**< Size of the preemption buffer to use for this job */ + u64 primary_preempt_buf_addr; + /**< Address of the primary preemption buffer to use for this job */ + u32 primary_preempt_buf_size; + /**< Size of the primary preemption buffer to use for this job */ + u32 secondary_preempt_buf_size; + /**< Size of secondary preemption buffer to use for this job */ + u64 secondary_preempt_buf_addr; + /**< Address of secondary preemption buffer to use for this job */ u8 reserved_0[VPU_JOB_RESERVED_BYTES]; }; @@ -153,6 +175,46 @@ enum vpu_trace_entity_type { }; /* + * HWS specific log buffer header details. + * Total size is 32 bytes. + */ +struct vpu_hws_log_buffer_header { + /* Written by VPU after adding a log entry. Initialised by host to 0. */ + u32 first_free_entry_index; + /* Incremented by VPU every time the VPU overwrites the 0th entry; + * initialised by host to 0. + */ + u32 wraparound_count; + /* + * This is the number of buffers that can be stored in the log buffer provided by the host. + * It is written by host before passing buffer to VPU. VPU should consider it read-only. + */ + u64 num_of_entries; + u64 reserved[2]; +}; + +/* + * HWS specific log buffer entry details. + * Total size is 32 bytes. + */ +struct vpu_hws_log_buffer_entry { + /* VPU timestamp must be an invariant timer tick (not impacted by DVFS) */ + u64 vpu_timestamp; + /* + * Operation type: + * 0 - context state change + * 1 - queue new work + * 2 - queue unwait sync object + * 3 - queue no more work + * 4 - queue wait sync object + */ + u32 operation_type; + u32 reserved; + /* Operation data depends on operation type */ + u64 operation_data[2]; +}; + +/* * Host <-> VPU IPC messages types. */ enum vpu_ipc_msg_type { @@ -228,6 +290,23 @@ enum vpu_ipc_msg_type { * deallocated or reassigned to another context. */ VPU_JSM_MSG_HWS_REGISTER_DB = 0x1117, + /** Control command: Log buffer setting */ + VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG = 0x1118, + /* Control command: Suspend command queue. */ + VPU_JSM_MSG_HWS_SUSPEND_CMDQ = 0x1119, + /* Control command: Resume command queue */ + VPU_JSM_MSG_HWS_RESUME_CMDQ = 0x111a, + /* Control command: Resume engine after reset */ + VPU_JSM_MSG_HWS_ENGINE_RESUME = 0x111b, + /* Control command: Enable survivability/DCT mode */ + VPU_JSM_MSG_DCT_ENABLE = 0x111c, + /* Control command: Disable survivability/DCT mode */ + VPU_JSM_MSG_DCT_DISABLE = 0x111d, + /** + * Dump VPU state. To be used for debug purposes only. + * NOTE: Please introduce new ASYNC commands before this one. * + */ + VPU_JSM_MSG_STATE_DUMP = 0x11FF, /* IPC Host -> Device, General commands */ VPU_JSM_MSG_GENERAL_CMD = 0x1200, VPU_JSM_MSG_BLOB_DEINIT = VPU_JSM_MSG_GENERAL_CMD, @@ -236,6 +315,10 @@ enum vpu_ipc_msg_type { * Linux command: `echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control`. */ VPU_JSM_MSG_DYNDBG_CONTROL = 0x1201, + /** + * Perform the save procedure for the D0i3 entry + */ + VPU_JSM_MSG_PWR_D0I3_ENTER = 0x1202, /* IPC Device -> Host, Job completion */ VPU_JSM_MSG_JOB_DONE = 0x2100, /* IPC Device -> Host, Async command completion */ @@ -304,11 +387,35 @@ enum vpu_ipc_msg_type { VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP = 0x2216, /** Response to control command: Set context scheduling properties */ VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP = 0x2217, + /** Response to control command: Log buffer setting */ + VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP = 0x2218, + /* IPC Device -> Host, HWS notify index entry of log buffer written */ + VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION = 0x2219, + /* IPC Device -> Host, HWS completion of a context suspend request */ + VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE = 0x221a, + /* Response to control command: Resume command queue */ + VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP = 0x221b, + /* Response to control command: Resume engine command response */ + VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE = 0x221c, + /* Response to control command: Enable survivability/DCT mode */ + VPU_JSM_MSG_DCT_ENABLE_DONE = 0x221d, + /* Response to control command: Disable survivability/DCT mode */ + VPU_JSM_MSG_DCT_DISABLE_DONE = 0x221e, + /** + * Response to state dump control command. + * NOTE: Please introduce new ASYNC responses before this one. * + */ + VPU_JSM_MSG_STATE_DUMP_RSP = 0x22FF, /* IPC Device -> Host, General command completion */ VPU_JSM_MSG_GENERAL_CMD_DONE = 0x2300, VPU_JSM_MSG_BLOB_DEINIT_DONE = VPU_JSM_MSG_GENERAL_CMD_DONE, /** Response to VPU_JSM_MSG_DYNDBG_CONTROL. */ VPU_JSM_MSG_DYNDBG_CONTROL_RSP = 0x2301, + /** + * Acknowledgment of completion of the save procedure initiated by + * VPU_JSM_MSG_PWR_D0I3_ENTER + */ + VPU_JSM_MSG_PWR_D0I3_ENTER_DONE = 0x2302, }; enum vpu_ipc_msg_status { VPU_JSM_MSG_FREE, VPU_JSM_MSG_ALLOCATED }; @@ -593,12 +700,12 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup { * Default quantum in 100ns units for scheduling across processes * within a priority band */ - u64 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS]; + u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS]; /* * Default grace period in 100ns units for processes that preempt each * other within a priority band */ - u64 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS]; + u32 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS]; /* * For normal priority band, specifies the target VPU percentage * in situations when it's starved by the focus band. @@ -608,32 +715,51 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup { u32 reserved_0; }; -/* HWS create command queue request */ +/* + * @brief HWS create command queue request. + * Host will create a command queue via this command. + * Note: Cmdq group is a handle of an object which + * may contain one or more command queues. + * @see VPU_JSM_MSG_CREATE_CMD_QUEUE + * @see VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP + */ struct vpu_ipc_msg_payload_hws_create_cmdq { /* Process id */ u64 process_id; /* Host SSID */ u32 host_ssid; - /* Zero Padding */ - u32 reserved; + /* Engine for which queue is being created */ + u32 engine_idx; + /* + * Cmdq group may be set to 0 or equal to + * cmdq_id while each priority band contains + * only single engine instances. + */ + u64 cmdq_group; /* Command queue id */ u64 cmdq_id; /* Command queue base */ u64 cmdq_base; /* Command queue size */ u32 cmdq_size; - /* Reserved */ + /* Zero padding */ u32 reserved_0; }; -/* HWS create command queue response */ +/* + * @brief HWS create command queue response. + * @see VPU_JSM_MSG_CREATE_CMD_QUEUE + * @see VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP + */ struct vpu_ipc_msg_payload_hws_create_cmdq_rsp { /* Process id */ u64 process_id; /* Host SSID */ u32 host_ssid; - /* Zero Padding */ - u32 reserved; + /* Engine for which queue is being created */ + u32 engine_idx; + /* Command queue group */ + u64 cmdq_group; /* Command queue id */ u64 cmdq_id; }; @@ -661,7 +787,7 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties { /* Inside realtime band assigns a further priority */ u32 realtime_priority_level; /* Priority relative to other contexts in the same process */ - u32 in_process_priority; + s32 in_process_priority; /* Zero padding / Reserved */ u32 reserved_1; /* Context quantum relative to other contexts of same priority in the same process */ @@ -694,6 +820,123 @@ struct vpu_jsm_hws_register_db { u64 cmdq_size; }; +/* + * @brief Structure to set another buffer to be used for scheduling-related logging. + * The size of the logging buffer and the number of entries is defined as part of the + * buffer itself as described next. + * The log buffer received from the host is made up of; + * - header: 32 bytes in size, as shown in 'struct vpu_hws_log_buffer_header'. + * The header contains the number of log entries in the buffer. + * - log entry: 0 to n-1, each log entry is 32 bytes in size, as shown in + * 'struct vpu_hws_log_buffer_entry'. + * The entry contains the VPU timestamp, operation type and data. + * The host should provide the notify index value of log buffer to VPU. This is a + * value defined within the log buffer and when written to will generate the + * scheduling log notification. + * The host should set engine_idx and vpu_log_buffer_va to 0 to disable logging + * for a particular engine. + * VPU will handle one log buffer for each of supported engines. + * VPU should allow the logging to consume one host_ssid. + * @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG + * @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP + * @see VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION + */ +struct vpu_ipc_msg_payload_hws_set_scheduling_log { + /* Engine ordinal */ + u32 engine_idx; + /* Host SSID */ + u32 host_ssid; + /* + * VPU log buffer virtual address. + * Set to 0 to disable logging for this engine. + */ + u64 vpu_log_buffer_va; + /* + * Notify index of log buffer. VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION + * is generated when an event log is written to this index. + */ + u64 notify_index; +}; + +/* + * @brief The scheduling log notification is generated by VPU when it writes + * an event into the log buffer at the notify_index. VPU notifies host with + * VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION. This is an asynchronous + * message from VPU to host. + * @see VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION + * @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG + */ +struct vpu_ipc_msg_payload_hws_scheduling_log_notification { + /* Engine ordinal */ + u32 engine_idx; + /* Zero Padding */ + u32 reserved_0; +}; + +/* + * @brief HWS suspend command queue request and done structure. + * Host will request the suspend of contexts and VPU will; + * - Suspend all work on this context + * - Preempt any running work + * - Asynchronously perform the above and return success immediately once + * all items above are started successfully + * - Notify the host of completion of these operations via + * VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE + * - Reject any other context operations on a context with an in-flight + * suspend request running + * Same structure used when VPU notifies host of completion of a context suspend + * request. The ids and suspend fence value reported in this command will match + * the one in the request from the host to suspend the context. Once suspend is + * complete, VPU will not access any data relating to this command queue until + * it is resumed. + * @see VPU_JSM_MSG_HWS_SUSPEND_CMDQ + * @see VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE + */ +struct vpu_ipc_msg_payload_hws_suspend_cmdq { + /* Host SSID */ + u32 host_ssid; + /* Zero Padding */ + u32 reserved_0; + /* Command queue id */ + u64 cmdq_id; + /* + * Suspend fence value - reported by the VPU suspend context + * completed once suspend is complete. + */ + u64 suspend_fence_value; +}; + +/* + * @brief HWS Resume command queue request / response structure. + * Host will request the resume of a context; + * - VPU will resume all work on this context + * - Scheduler will allow this context to be scheduled + * @see VPU_JSM_MSG_HWS_RESUME_CMDQ + * @see VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP + */ +struct vpu_ipc_msg_payload_hws_resume_cmdq { + /* Host SSID */ + u32 host_ssid; + /* Zero Padding */ + u32 reserved_0; + /* Command queue id */ + u64 cmdq_id; +}; + +/* + * @brief HWS Resume engine request / response structure. + * After a HWS engine reset, all scheduling is stopped on VPU until a engine resume. + * Host shall send this command to resume scheduling of any valid queue. + * @see VPU_JSM_MSG_HWS_RESUME_ENGINE + * @see VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE + */ +struct vpu_ipc_msg_payload_hws_resume_engine { + /* Engine to be resumed */ + u32 engine_idx; + /* Reserved */ + u32 reserved_0; +}; + /** * Payload for VPU_JSM_MSG_TRACE_SET_CONFIG[_RSP] and * VPU_JSM_MSG_TRACE_GET_CONFIG_RSP messages. @@ -938,6 +1181,35 @@ struct vpu_ipc_msg_payload_dyndbg_control { char dyndbg_cmd[VPU_DYNDBG_CMD_MAX_LEN]; }; +/** + * Payload for VPU_JSM_MSG_PWR_D0I3_ENTER + * + * This is a bi-directional payload. + */ +struct vpu_ipc_msg_payload_pwr_d0i3_enter { + /** + * 0: VPU_JSM_MSG_PWR_D0I3_ENTER_DONE is not sent to the host driver + * The driver will poll for D0i2 Idle state transitions. + * 1: VPU_JSM_MSG_PWR_D0I3_ENTER_DONE is sent after VPU state save is complete + */ + u32 send_response; + u32 reserved_0; +}; + +/** + * Payload for VPU_JSM_MSG_DCT_ENABLE message. + * + * Default values for DCT active/inactive times are 5.3ms and 30ms respectively, + * corresponding to a 85% duty cycle. This payload allows the host to tune these + * values according to application requirements. + */ +struct vpu_ipc_msg_payload_pwr_dct_control { + /** Duty cycle active time in microseconds */ + u32 dct_active_us; + /** Duty cycle inactive time in microseconds */ + u32 dct_inactive_us; +}; + /* * Payloads union, used to define complete message format. */ @@ -974,6 +1246,13 @@ union vpu_ipc_msg_payload { struct vpu_ipc_msg_payload_hws_destroy_cmdq hws_destroy_cmdq; struct vpu_ipc_msg_payload_hws_set_context_sched_properties hws_set_context_sched_properties; + struct vpu_ipc_msg_payload_hws_set_scheduling_log hws_set_scheduling_log; + struct vpu_ipc_msg_payload_hws_scheduling_log_notification hws_scheduling_log_notification; + struct vpu_ipc_msg_payload_hws_suspend_cmdq hws_suspend_cmdq; + struct vpu_ipc_msg_payload_hws_resume_cmdq hws_resume_cmdq; + struct vpu_ipc_msg_payload_hws_resume_engine hws_resume_engine; + struct vpu_ipc_msg_payload_pwr_d0i3_enter pwr_d0i3_enter; + struct vpu_ipc_msg_payload_pwr_dct_control pwr_dct_control; }; /* |