summaryrefslogtreecommitdiff
path: root/drivers/accel/ivpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel/ivpu')
-rw-r--r--drivers/accel/ivpu/Kconfig11
-rw-r--r--drivers/accel/ivpu/ivpu_debugfs.c57
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c49
-rw-r--r--drivers/accel/ivpu/ivpu_drv.h18
-rw-r--r--drivers/accel/ivpu/ivpu_fw.c79
-rw-r--r--drivers/accel/ivpu/ivpu_fw.h1
-rw-r--r--drivers/accel/ivpu/ivpu_gem.c678
-rw-r--r--drivers/accel/ivpu/ivpu_gem.h75
-rw-r--r--drivers/accel/ivpu/ivpu_hw.h20
-rw-r--r--drivers/accel/ivpu/ivpu_hw_37xx.c59
-rw-r--r--drivers/accel/ivpu/ivpu_hw_37xx_reg.h2
-rw-r--r--drivers/accel/ivpu/ivpu_hw_40xx.c69
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.c251
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.h33
-rw-r--r--drivers/accel/ivpu/ivpu_job.c99
-rw-r--r--drivers/accel/ivpu/ivpu_job.h4
-rw-r--r--drivers/accel/ivpu/ivpu_jsm_msg.c38
-rw-r--r--drivers/accel/ivpu/ivpu_jsm_msg.h1
-rw-r--r--drivers/accel/ivpu/ivpu_mmu.c44
-rw-r--r--drivers/accel/ivpu/ivpu_mmu_context.c153
-rw-r--r--drivers/accel/ivpu/ivpu_mmu_context.h11
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c72
-rw-r--r--drivers/accel/ivpu/ivpu_pm.h3
-rw-r--r--drivers/accel/ivpu/vpu_boot_api.h90
-rw-r--r--drivers/accel/ivpu/vpu_jsm_api.h309
25 files changed, 1326 insertions, 900 deletions
diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig
index 1a4c4ed9d113..682c53245286 100644
--- a/drivers/accel/ivpu/Kconfig
+++ b/drivers/accel/ivpu/Kconfig
@@ -1,16 +1,17 @@
# SPDX-License-Identifier: GPL-2.0-only
config DRM_ACCEL_IVPU
- tristate "Intel VPU for Meteor Lake and newer"
+ tristate "Intel NPU (Neural Processing Unit)"
depends on DRM_ACCEL
depends on X86_64 && !UML
depends on PCI && PCI_MSI
select FW_LOADER
- select SHMEM
+ select DRM_GEM_SHMEM_HELPER
select GENERIC_ALLOCATOR
help
- Choose this option if you have a system that has an 14th generation Intel CPU
- or newer. VPU stands for Versatile Processing Unit and it's a CPU-integrated
- inference accelerator for Computer Vision and Deep Learning applications.
+ Choose this option if you have a system with an 14th generation
+ Intel CPU (Meteor Lake) or newer. Intel NPU (formerly called Intel VPU)
+ is a CPU-integrated inference accelerator for Computer Vision
+ and Deep Learning applications.
If "M" is selected, the module will be called intel_vpu.
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index ea453b985b49..19035230563d 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -14,6 +14,7 @@
#include "ivpu_fw.h"
#include "ivpu_fw_log.h"
#include "ivpu_gem.h"
+#include "ivpu_hw.h"
#include "ivpu_jsm_msg.h"
#include "ivpu_pm.h"
@@ -115,6 +116,31 @@ static const struct drm_debugfs_info vdev_debugfs_list[] = {
{"reset_pending", reset_pending_show, 0},
};
+static ssize_t
+dvfs_mode_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
+{
+ struct ivpu_device *vdev = file->private_data;
+ struct ivpu_fw_info *fw = vdev->fw;
+ u32 dvfs_mode;
+ int ret;
+
+ ret = kstrtou32_from_user(user_buf, size, 0, &dvfs_mode);
+ if (ret < 0)
+ return ret;
+
+ fw->dvfs_mode = dvfs_mode;
+
+ ivpu_pm_schedule_recovery(vdev);
+
+ return size;
+}
+
+static const struct file_operations dvfs_mode_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = dvfs_mode_fops_write,
+};
+
static int fw_log_show(struct seq_file *s, void *v)
{
struct ivpu_device *vdev = s->private;
@@ -152,6 +178,30 @@ static const struct file_operations fw_log_fops = {
};
static ssize_t
+fw_profiling_freq_fops_write(struct file *file, const char __user *user_buf,
+ size_t size, loff_t *pos)
+{
+ struct ivpu_device *vdev = file->private_data;
+ bool enable;
+ int ret;
+
+ ret = kstrtobool_from_user(user_buf, size, &enable);
+ if (ret < 0)
+ return ret;
+
+ ivpu_hw_profiling_freq_drive(vdev, enable);
+ ivpu_pm_schedule_recovery(vdev);
+
+ return size;
+}
+
+static const struct file_operations fw_profiling_freq_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = fw_profiling_freq_fops_write,
+};
+
+static ssize_t
fw_trace_destination_mask_fops_write(struct file *file, const char __user *user_buf,
size_t size, loff_t *pos)
{
@@ -280,6 +330,9 @@ void ivpu_debugfs_init(struct ivpu_device *vdev)
debugfs_create_file("force_recovery", 0200, debugfs_root, vdev,
&ivpu_force_recovery_fops);
+ debugfs_create_file("dvfs_mode", 0200, debugfs_root, vdev,
+ &dvfs_mode_fops);
+
debugfs_create_file("fw_log", 0644, debugfs_root, vdev,
&fw_log_fops);
debugfs_create_file("fw_trace_destination_mask", 0200, debugfs_root, vdev,
@@ -291,4 +344,8 @@ void ivpu_debugfs_init(struct ivpu_device *vdev)
debugfs_create_file("reset_engine", 0200, debugfs_root, vdev,
&ivpu_reset_engine_fops);
+
+ if (ivpu_hw_gen(vdev) >= IVPU_HW_40XX)
+ debugfs_create_file("fw_profiling_freq_drive", 0200,
+ debugfs_root, vdev, &fw_profiling_freq_fops);
}
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 790603017653..64927682161b 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -31,8 +31,6 @@
__stringify(DRM_IVPU_DRIVER_MINOR) "."
#endif
-static const struct drm_driver driver;
-
static struct lock_class_key submitted_jobs_xa_lock_class_key;
int ivpu_dbg_mask;
@@ -41,7 +39,7 @@ MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros.");
int ivpu_test_mode;
module_param_named_unsafe(test_mode, ivpu_test_mode, int, 0644);
-MODULE_PARM_DESC(test_mode, "Test mode: 0 - normal operation, 1 - fw unit test, 2 - null hw");
+MODULE_PARM_DESC(test_mode, "Test mode mask. See IVPU_TEST_MODE_* macros.");
u8 ivpu_pll_min_ratio;
module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644);
@@ -93,8 +91,8 @@ static void file_priv_release(struct kref *ref)
ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id);
ivpu_cmdq_release_all(file_priv);
- ivpu_bo_remove_all_bos_from_context(&file_priv->ctx);
ivpu_jsm_context_release(vdev, file_priv->ctx.id);
+ ivpu_bo_remove_all_bos_from_context(vdev, &file_priv->ctx);
ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv);
mutex_destroy(&file_priv->lock);
@@ -317,16 +315,14 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev)
unsigned long timeout;
int ret;
- if (ivpu_test_mode == IVPU_TEST_MODE_FW_TEST)
+ if (ivpu_test_mode & IVPU_TEST_MODE_FW_TEST)
return 0;
- ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG);
+ ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG, NULL);
timeout = jiffies + msecs_to_jiffies(vdev->timeout.boot);
while (1) {
- ret = ivpu_ipc_irq_handler(vdev);
- if (ret)
- break;
+ ivpu_ipc_irq_handler(vdev, NULL);
ret = ivpu_ipc_receive(vdev, &cons, &ipc_hdr, NULL, 0);
if (ret != -ETIMEDOUT || time_after_eq(jiffies, timeout))
break;
@@ -362,7 +358,7 @@ int ivpu_boot(struct ivpu_device *vdev)
int ret;
/* Update boot params located at first 4KB of FW memory */
- ivpu_fw_boot_params_setup(vdev, vdev->fw->mem->kvaddr);
+ ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem));
ret = ivpu_hw_boot_fw(vdev);
if (ret) {
@@ -414,7 +410,9 @@ static const struct drm_driver driver = {
.open = ivpu_open,
.postclose = ivpu_postclose,
- .gem_prime_import = ivpu_gem_prime_import,
+
+ .gem_create_object = ivpu_gem_create_object,
+ .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
.ioctls = ivpu_drm_ioctls,
.num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls),
@@ -427,6 +425,13 @@ static const struct drm_driver driver = {
.minor = DRM_IVPU_DRIVER_MINOR,
};
+static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg)
+{
+ struct ivpu_device *vdev = arg;
+
+ return ivpu_ipc_irq_thread_handler(vdev);
+}
+
static int ivpu_irq_init(struct ivpu_device *vdev)
{
struct pci_dev *pdev = to_pci_dev(vdev->drm.dev);
@@ -440,8 +445,8 @@ static int ivpu_irq_init(struct ivpu_device *vdev)
vdev->irq = pci_irq_vector(pdev, 0);
- ret = devm_request_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler,
- IRQF_NO_AUTOEN, DRIVER_NAME, vdev);
+ ret = devm_request_threaded_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler,
+ ivpu_irq_thread_handler, IRQF_NO_AUTOEN, DRIVER_NAME, vdev);
if (ret)
ivpu_err(vdev, "Failed to request an IRQ %d\n", ret);
@@ -533,6 +538,11 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC);
xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
+ INIT_LIST_HEAD(&vdev->bo_list);
+
+ ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock);
+ if (ret)
+ goto err_xa_destroy;
ret = ivpu_pci_init(vdev);
if (ret)
@@ -550,7 +560,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
/* Power up early so the rest of init code can access VPU registers */
ret = ivpu_hw_power_up(vdev);
if (ret)
- goto err_xa_destroy;
+ goto err_power_down;
ret = ivpu_mmu_global_context_init(vdev);
if (ret)
@@ -574,20 +584,15 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
ivpu_pm_init(vdev);
- ret = ivpu_job_done_thread_init(vdev);
- if (ret)
- goto err_ipc_fini;
-
ret = ivpu_boot(vdev);
if (ret)
- goto err_job_done_thread_fini;
+ goto err_ipc_fini;
+ ivpu_job_done_consumer_init(vdev);
ivpu_pm_enable(vdev);
return 0;
-err_job_done_thread_fini:
- ivpu_job_done_thread_fini(vdev);
err_ipc_fini:
ivpu_ipc_fini(vdev);
err_fw_fini:
@@ -612,7 +617,7 @@ static void ivpu_dev_fini(struct ivpu_device *vdev)
ivpu_shutdown(vdev);
if (IVPU_WA(d3hot_after_power_off))
pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
- ivpu_job_done_thread_fini(vdev);
+ ivpu_job_done_consumer_fini(vdev);
ivpu_pm_cancel_recovery(vdev);
ivpu_ipc_fini(vdev);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 417ddeca8517..ebc4b84f27b2 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -17,9 +17,10 @@
#include <uapi/drm/ivpu_accel.h>
#include "ivpu_mmu_context.h"
+#include "ivpu_ipc.h"
#define DRIVER_NAME "intel_vpu"
-#define DRIVER_DESC "Driver for Intel Versatile Processing Unit (VPU)"
+#define DRIVER_DESC "Driver for Intel NPU (Neural Processing Unit)"
#define DRIVER_DATE "20230117"
#define PCI_DEVICE_ID_MTL 0x7d1d
@@ -88,6 +89,7 @@ struct ivpu_wa_table {
bool d3hot_after_power_off;
bool interrupt_clear_with_0;
bool disable_clock_relinquish;
+ bool disable_d0i3_msg;
};
struct ivpu_hw_info;
@@ -115,8 +117,11 @@ struct ivpu_device {
struct xarray context_xa;
struct xa_limit context_xa_limit;
+ struct mutex bo_list_lock; /* Protects bo_list */
+ struct list_head bo_list;
+
struct xarray submitted_jobs_xa;
- struct task_struct *job_done_thread;
+ struct ivpu_ipc_consumer job_done_consumer;
atomic64_t unique_id_counter;
@@ -126,6 +131,7 @@ struct ivpu_device {
int tdr;
int reschedule_suspend;
int autosuspend;
+ int d0i3_entry_msg;
} timeout;
};
@@ -148,9 +154,11 @@ extern u8 ivpu_pll_min_ratio;
extern u8 ivpu_pll_max_ratio;
extern bool ivpu_disable_mmu_cont_pages;
-#define IVPU_TEST_MODE_DISABLED 0
-#define IVPU_TEST_MODE_FW_TEST 1
-#define IVPU_TEST_MODE_NULL_HW 2
+#define IVPU_TEST_MODE_FW_TEST BIT(0)
+#define IVPU_TEST_MODE_NULL_HW BIT(1)
+#define IVPU_TEST_MODE_NULL_SUBMISSION BIT(2)
+#define IVPU_TEST_MODE_D0I3_MSG_DISABLE BIT(4)
+#define IVPU_TEST_MODE_D0I3_MSG_ENABLE BIT(5)
extern int ivpu_test_mode;
struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv);
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index 691da521dde5..6576232f3e67 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -33,12 +33,17 @@
#define ADDR_TO_L2_CACHE_CFG(addr) ((addr) >> 31)
-#define IVPU_FW_CHECK_API(vdev, fw_hdr, name, min_major) \
+/* Check if FW API is compatible with the driver */
+#define IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, name, min_major) \
ivpu_fw_check_api(vdev, fw_hdr, #name, \
VPU_##name##_API_VER_INDEX, \
VPU_##name##_API_VER_MAJOR, \
VPU_##name##_API_VER_MINOR, min_major)
+/* Check if API version is lower that the given version */
+#define IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, name, major, minor) \
+ ivpu_fw_check_api_ver_lt(vdev, fw_hdr, #name, VPU_##name##_API_VER_INDEX, major, minor)
+
static char *ivpu_firmware;
module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644);
MODULE_PARM_DESC(firmware, "VPU firmware binary in /lib/firmware/..");
@@ -105,6 +110,19 @@ ivpu_fw_check_api(struct ivpu_device *vdev, const struct vpu_firmware_header *fw
return 0;
}
+static bool
+ivpu_fw_check_api_ver_lt(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr,
+ const char *str, int index, u16 major, u16 minor)
+{
+ u16 fw_major = (u16)(fw_hdr->api_version[index] >> 16);
+ u16 fw_minor = (u16)(fw_hdr->api_version[index]);
+
+ if (fw_major < major || (fw_major == major && fw_minor < minor))
+ return true;
+
+ return false;
+}
+
static int ivpu_fw_parse(struct ivpu_device *vdev)
{
struct ivpu_fw_info *fw = vdev->fw;
@@ -164,9 +182,9 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
ivpu_info(vdev, "Firmware: %s, version: %s", fw->name,
(const char *)fw_hdr + VPU_FW_HEADER_SIZE);
- if (IVPU_FW_CHECK_API(vdev, fw_hdr, BOOT, 3))
+ if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, BOOT, 3))
return -EINVAL;
- if (IVPU_FW_CHECK_API(vdev, fw_hdr, JSM, 3))
+ if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, JSM, 3))
return -EINVAL;
fw->runtime_addr = runtime_addr;
@@ -182,6 +200,8 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
fw->trace_destination_mask = VPU_TRACE_DESTINATION_VERBOSE_TRACING;
fw->trace_hw_component_mask = -1;
+ fw->dvfs_mode = 0;
+
ivpu_dbg(vdev, FW_BOOT, "Size: file %lu image %u runtime %u shavenn %u\n",
fw->file->size, fw->image_size, fw->runtime_size, fw->shave_nn_size);
ivpu_dbg(vdev, FW_BOOT, "Address: runtime 0x%llx, load 0x%llx, entry point 0x%llx\n",
@@ -195,6 +215,24 @@ static void ivpu_fw_release(struct ivpu_device *vdev)
release_firmware(vdev->fw->file);
}
+/* Initialize workarounds that depend on FW version */
+static void
+ivpu_fw_init_wa(struct ivpu_device *vdev)
+{
+ const struct vpu_firmware_header *fw_hdr = (const void *)vdev->fw->file->data;
+
+ if (IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, BOOT, 3, 17) ||
+ (ivpu_hw_gen(vdev) > IVPU_HW_37XX) ||
+ (ivpu_test_mode & IVPU_TEST_MODE_D0I3_MSG_DISABLE))
+ vdev->wa.disable_d0i3_msg = true;
+
+ /* Force enable the feature for testing purposes */
+ if (ivpu_test_mode & IVPU_TEST_MODE_D0I3_MSG_ENABLE)
+ vdev->wa.disable_d0i3_msg = false;
+
+ IVPU_PRINT_WA(disable_d0i3_msg);
+}
+
static int ivpu_fw_update_global_range(struct ivpu_device *vdev)
{
struct ivpu_fw_info *fw = vdev->fw;
@@ -248,7 +286,7 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev)
if (fw->shave_nn_size) {
fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.shave.start,
- fw->shave_nn_size, DRM_IVPU_BO_UNCACHED);
+ fw->shave_nn_size, DRM_IVPU_BO_WC);
if (!fw->mem_shave_nn) {
ivpu_err(vdev, "Failed to allocate shavenn buffer\n");
ret = -ENOMEM;
@@ -297,6 +335,8 @@ int ivpu_fw_init(struct ivpu_device *vdev)
if (ret)
goto err_fw_release;
+ ivpu_fw_init_wa(vdev);
+
ret = ivpu_fw_mem_init(vdev);
if (ret)
goto err_fw_release;
@@ -422,14 +462,31 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
boot_params->punit_telemetry_sram_size);
ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_telemetry_enable = 0x%x\n",
boot_params->vpu_telemetry_enable);
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.dvfs_mode = %u\n",
+ boot_params->dvfs_mode);
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_delayed_entry = %d\n",
+ boot_params->d0i3_delayed_entry);
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n",
+ boot_params->d0i3_residency_time_us);
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n",
+ boot_params->d0i3_entry_vpu_ts);
}
void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
{
struct ivpu_bo *ipc_mem_rx = vdev->ipc->mem_rx;
- /* In case of warm boot we only have to reset the entrypoint addr */
+ /* In case of warm boot only update variable params */
if (!ivpu_fw_is_cold_boot(vdev)) {
+ boot_params->d0i3_residency_time_us =
+ ktime_us_delta(ktime_get_boottime(), vdev->hw->d0i3_entry_host_ts);
+ boot_params->d0i3_entry_vpu_ts = vdev->hw->d0i3_entry_vpu_ts;
+
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n",
+ boot_params->d0i3_residency_time_us);
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n",
+ boot_params->d0i3_entry_vpu_ts);
+
boot_params->save_restore_ret_address = 0;
vdev->pm->is_warmboot = true;
wmb(); /* Flush WC buffers after writing save_restore_ret_address */
@@ -443,6 +500,13 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->frequency = ivpu_hw_reg_pll_freq_get(vdev);
/*
+ * This param is a debug firmware feature. It switches default clock
+ * to higher resolution one for fine-grained and more accurate firmware
+ * task profiling.
+ */
+ boot_params->perf_clk_frequency = ivpu_hw_profiling_freq_get(vdev);
+
+ /*
* Uncached region of VPU address space, covers IPC buffers, job queues
* and log buffers, programmable to L2$ Uncached by VPU MTRR
*/
@@ -493,6 +557,11 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev);
boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev);
boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev);
+ boot_params->dvfs_mode = vdev->fw->dvfs_mode;
+ if (!IVPU_WA(disable_d0i3_msg))
+ boot_params->d0i3_delayed_entry = 1;
+ boot_params->d0i3_residency_time_us = 0;
+ boot_params->d0i3_entry_vpu_ts = 0;
wmb(); /* Flush WC buffers after writing bootparams */
diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h
index 10ae2847f0ef..66b60fa161b5 100644
--- a/drivers/accel/ivpu/ivpu_fw.h
+++ b/drivers/accel/ivpu/ivpu_fw.h
@@ -27,6 +27,7 @@ struct ivpu_fw_info {
u32 trace_level;
u32 trace_destination_mask;
u64 trace_hw_component_mask;
+ u32 dvfs_mode;
};
int ivpu_fw_init(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index c91852f2edc8..1dda4f38ea25 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -20,215 +20,18 @@
#include "ivpu_mmu.h"
#include "ivpu_mmu_context.h"
-MODULE_IMPORT_NS(DMA_BUF);
-
static const struct drm_gem_object_funcs ivpu_gem_funcs;
-static struct lock_class_key prime_bo_lock_class_key;
-
-static int __must_check prime_alloc_pages_locked(struct ivpu_bo *bo)
-{
- /* Pages are managed by the underlying dma-buf */
- return 0;
-}
-
-static void prime_free_pages_locked(struct ivpu_bo *bo)
-{
- /* Pages are managed by the underlying dma-buf */
-}
-
-static int prime_map_pages_locked(struct ivpu_bo *bo)
-{
- struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
- struct sg_table *sgt;
-
- sgt = dma_buf_map_attachment_unlocked(bo->base.import_attach, DMA_BIDIRECTIONAL);
- if (IS_ERR(sgt)) {
- ivpu_err(vdev, "Failed to map attachment: %ld\n", PTR_ERR(sgt));
- return PTR_ERR(sgt);
- }
-
- bo->sgt = sgt;
- return 0;
-}
-
-static void prime_unmap_pages_locked(struct ivpu_bo *bo)
-{
- dma_buf_unmap_attachment_unlocked(bo->base.import_attach, bo->sgt, DMA_BIDIRECTIONAL);
- bo->sgt = NULL;
-}
-
-static const struct ivpu_bo_ops prime_ops = {
- .type = IVPU_BO_TYPE_PRIME,
- .name = "prime",
- .alloc_pages = prime_alloc_pages_locked,
- .free_pages = prime_free_pages_locked,
- .map_pages = prime_map_pages_locked,
- .unmap_pages = prime_unmap_pages_locked,
-};
-
-static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo)
-{
- int npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
- struct page **pages;
-
- pages = drm_gem_get_pages(&bo->base);
- if (IS_ERR(pages))
- return PTR_ERR(pages);
-
- if (bo->flags & DRM_IVPU_BO_WC)
- set_pages_array_wc(pages, npages);
- else if (bo->flags & DRM_IVPU_BO_UNCACHED)
- set_pages_array_uc(pages, npages);
-
- bo->pages = pages;
- return 0;
-}
-
-static void shmem_free_pages_locked(struct ivpu_bo *bo)
-{
- if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
- set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
-
- drm_gem_put_pages(&bo->base, bo->pages, true, false);
- bo->pages = NULL;
-}
-
-static int ivpu_bo_map_pages_locked(struct ivpu_bo *bo)
-{
- int npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
- struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
- struct sg_table *sgt;
- int ret;
-
- sgt = drm_prime_pages_to_sg(&vdev->drm, bo->pages, npages);
- if (IS_ERR(sgt)) {
- ivpu_err(vdev, "Failed to allocate sgtable\n");
- return PTR_ERR(sgt);
- }
-
- ret = dma_map_sgtable(vdev->drm.dev, sgt, DMA_BIDIRECTIONAL, 0);
- if (ret) {
- ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
- goto err_free_sgt;
- }
-
- bo->sgt = sgt;
- return 0;
-
-err_free_sgt:
- kfree(sgt);
- return ret;
-}
-
-static void ivpu_bo_unmap_pages_locked(struct ivpu_bo *bo)
-{
- struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-
- dma_unmap_sgtable(vdev->drm.dev, bo->sgt, DMA_BIDIRECTIONAL, 0);
- sg_free_table(bo->sgt);
- kfree(bo->sgt);
- bo->sgt = NULL;
-}
-
-static const struct ivpu_bo_ops shmem_ops = {
- .type = IVPU_BO_TYPE_SHMEM,
- .name = "shmem",
- .alloc_pages = shmem_alloc_pages_locked,
- .free_pages = shmem_free_pages_locked,
- .map_pages = ivpu_bo_map_pages_locked,
- .unmap_pages = ivpu_bo_unmap_pages_locked,
-};
-
-static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo)
-{
- unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
- struct page **pages;
- int ret;
-
- pages = kvmalloc_array(npages, sizeof(*bo->pages), GFP_KERNEL);
- if (!pages)
- return -ENOMEM;
-
- for (i = 0; i < npages; i++) {
- pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
- if (!pages[i]) {
- ret = -ENOMEM;
- goto err_free_pages;
- }
- cond_resched();
- }
-
- bo->pages = pages;
- return 0;
-
-err_free_pages:
- while (i--)
- put_page(pages[i]);
- kvfree(pages);
- return ret;
-}
-
-static void internal_free_pages_locked(struct ivpu_bo *bo)
-{
- unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
-
- if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
- set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
-
- for (i = 0; i < npages; i++)
- put_page(bo->pages[i]);
-
- kvfree(bo->pages);
- bo->pages = NULL;
-}
-
-static const struct ivpu_bo_ops internal_ops = {
- .type = IVPU_BO_TYPE_INTERNAL,
- .name = "internal",
- .alloc_pages = internal_alloc_pages_locked,
- .free_pages = internal_free_pages_locked,
- .map_pages = ivpu_bo_map_pages_locked,
- .unmap_pages = ivpu_bo_unmap_pages_locked,
-};
-
-static int __must_check ivpu_bo_alloc_and_map_pages_locked(struct ivpu_bo *bo)
-{
- struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
- int ret;
-
- lockdep_assert_held(&bo->lock);
- drm_WARN_ON(&vdev->drm, bo->sgt);
-
- ret = bo->ops->alloc_pages(bo);
- if (ret) {
- ivpu_err(vdev, "Failed to allocate pages for BO: %d", ret);
- return ret;
- }
-
- ret = bo->ops->map_pages(bo);
- if (ret) {
- ivpu_err(vdev, "Failed to map pages for BO: %d", ret);
- goto err_free_pages;
- }
- return ret;
-
-err_free_pages:
- bo->ops->free_pages(bo);
- return ret;
-}
-
-static void ivpu_bo_unmap_and_free_pages(struct ivpu_bo *bo)
+static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, const char *action)
{
- mutex_lock(&bo->lock);
-
- WARN_ON(!bo->sgt);
- bo->ops->unmap_pages(bo);
- WARN_ON(bo->sgt);
- bo->ops->free_pages(bo);
- WARN_ON(bo->pages);
-
- mutex_unlock(&bo->lock);
+ if (bo->ctx)
+ ivpu_dbg(vdev, BO, "%6s: size %zu has_pages %d dma_mapped %d handle %u ctx %d vpu_addr 0x%llx mmu_mapped %d\n",
+ action, ivpu_bo_size(bo), (bool)bo->base.pages, (bool)bo->base.sgt,
+ bo->handle, bo->ctx->id, bo->vpu_addr, bo->mmu_mapped);
+ else
+ ivpu_dbg(vdev, BO, "%6s: size %zu has_pages %d dma_mapped %d handle %u (not added to context)\n",
+ action, ivpu_bo_size(bo), (bool)bo->base.pages, (bool)bo->base.sgt,
+ bo->handle);
}
/*
@@ -245,21 +48,24 @@ int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
mutex_lock(&bo->lock);
- if (!bo->vpu_addr) {
- ivpu_err(vdev, "vpu_addr not set for BO ctx_id: %d handle: %d\n",
- bo->ctx->id, bo->handle);
+ ivpu_dbg_bo(vdev, bo, "pin");
+
+ if (!bo->ctx) {
+ ivpu_err(vdev, "vpu_addr not allocated for BO %d\n", bo->handle);
ret = -EINVAL;
goto unlock;
}
- if (!bo->sgt) {
- ret = ivpu_bo_alloc_and_map_pages_locked(bo);
- if (ret)
+ if (!bo->mmu_mapped) {
+ struct sg_table *sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
+
+ if (IS_ERR(sgt)) {
+ ret = PTR_ERR(sgt);
+ ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
goto unlock;
- }
+ }
- if (!bo->mmu_mapped) {
- ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, bo->sgt,
+ ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, sgt,
ivpu_bo_is_snooped(bo));
if (ret) {
ivpu_err(vdev, "Failed to map BO in MMU: %d\n", ret);
@@ -281,248 +87,213 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
int ret;
- if (!range) {
- if (bo->flags & DRM_IVPU_BO_SHAVE_MEM)
- range = &vdev->hw->ranges.shave;
- else if (bo->flags & DRM_IVPU_BO_DMA_MEM)
- range = &vdev->hw->ranges.dma;
- else
- range = &vdev->hw->ranges.user;
- }
+ mutex_lock(&bo->lock);
- mutex_lock(&ctx->lock);
- ret = ivpu_mmu_context_insert_node_locked(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
+ ret = ivpu_mmu_context_insert_node(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
if (!ret) {
bo->ctx = ctx;
bo->vpu_addr = bo->mm_node.start;
- list_add_tail(&bo->ctx_node, &ctx->bo_list);
+ } else {
+ ivpu_err(vdev, "Failed to add BO to context %u: %d\n", ctx->id, ret);
}
- mutex_unlock(&ctx->lock);
+
+ ivpu_dbg_bo(vdev, bo, "alloc");
+
+ mutex_unlock(&bo->lock);
return ret;
}
-static void ivpu_bo_free_vpu_addr(struct ivpu_bo *bo)
+static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
{
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
- struct ivpu_mmu_context *ctx = bo->ctx;
- ivpu_dbg(vdev, BO, "remove from ctx: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n",
- ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped);
+ lockdep_assert_held(&bo->lock);
- mutex_lock(&bo->lock);
+ ivpu_dbg_bo(vdev, bo, "unbind");
+
+ /* TODO: dma_unmap */
if (bo->mmu_mapped) {
- drm_WARN_ON(&vdev->drm, !bo->sgt);
- ivpu_mmu_context_unmap_sgt(vdev, ctx, bo->vpu_addr, bo->sgt);
+ drm_WARN_ON(&vdev->drm, !bo->ctx);
+ drm_WARN_ON(&vdev->drm, !bo->vpu_addr);
+ drm_WARN_ON(&vdev->drm, !bo->base.sgt);
+ ivpu_mmu_context_unmap_sgt(vdev, bo->ctx, bo->vpu_addr, bo->base.sgt);
bo->mmu_mapped = false;
}
- mutex_lock(&ctx->lock);
- list_del(&bo->ctx_node);
- bo->vpu_addr = 0;
- bo->ctx = NULL;
- ivpu_mmu_context_remove_node_locked(ctx, &bo->mm_node);
- mutex_unlock(&ctx->lock);
+ if (bo->ctx) {
+ ivpu_mmu_context_remove_node(bo->ctx, &bo->mm_node);
+ bo->vpu_addr = 0;
+ bo->ctx = NULL;
+ }
+}
+static void ivpu_bo_unbind(struct ivpu_bo *bo)
+{
+ mutex_lock(&bo->lock);
+ ivpu_bo_unbind_locked(bo);
mutex_unlock(&bo->lock);
}
-void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx)
+void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
{
- struct ivpu_bo *bo, *tmp;
+ struct ivpu_bo *bo;
+
+ if (drm_WARN_ON(&vdev->drm, !ctx))
+ return;
- list_for_each_entry_safe(bo, tmp, &ctx->bo_list, ctx_node)
- ivpu_bo_free_vpu_addr(bo);
+ mutex_lock(&vdev->bo_list_lock);
+ list_for_each_entry(bo, &vdev->bo_list, bo_list_node) {
+ mutex_lock(&bo->lock);
+ if (bo->ctx == ctx)
+ ivpu_bo_unbind_locked(bo);
+ mutex_unlock(&bo->lock);
+ }
+ mutex_unlock(&vdev->bo_list_lock);
}
-static struct ivpu_bo *
-ivpu_bo_alloc(struct ivpu_device *vdev, struct ivpu_mmu_context *mmu_context,
- u64 size, u32 flags, const struct ivpu_bo_ops *ops,
- const struct ivpu_addr_range *range, u64 user_ptr)
+struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size)
{
struct ivpu_bo *bo;
- int ret = 0;
-
- if (drm_WARN_ON(&vdev->drm, size == 0 || !PAGE_ALIGNED(size)))
- return ERR_PTR(-EINVAL);
- switch (flags & DRM_IVPU_BO_CACHE_MASK) {
- case DRM_IVPU_BO_CACHED:
- case DRM_IVPU_BO_UNCACHED:
- case DRM_IVPU_BO_WC:
- break;
- default:
+ if (size == 0 || !PAGE_ALIGNED(size))
return ERR_PTR(-EINVAL);
- }
bo = kzalloc(sizeof(*bo), GFP_KERNEL);
if (!bo)
return ERR_PTR(-ENOMEM);
- mutex_init(&bo->lock);
- bo->base.funcs = &ivpu_gem_funcs;
- bo->flags = flags;
- bo->ops = ops;
- bo->user_ptr = user_ptr;
-
- if (ops->type == IVPU_BO_TYPE_SHMEM)
- ret = drm_gem_object_init(&vdev->drm, &bo->base, size);
- else
- drm_gem_private_object_init(&vdev->drm, &bo->base, size);
-
- if (ret) {
- ivpu_err(vdev, "Failed to initialize drm object\n");
- goto err_free;
- }
-
- if (flags & DRM_IVPU_BO_MAPPABLE) {
- ret = drm_gem_create_mmap_offset(&bo->base);
- if (ret) {
- ivpu_err(vdev, "Failed to allocate mmap offset\n");
- goto err_release;
- }
- }
-
- if (mmu_context) {
- ret = ivpu_bo_alloc_vpu_addr(bo, mmu_context, range);
- if (ret) {
- ivpu_err(vdev, "Failed to add BO to context: %d\n", ret);
- goto err_release;
- }
- }
+ bo->base.base.funcs = &ivpu_gem_funcs;
+ bo->base.pages_mark_dirty_on_put = true; /* VPU can dirty a BO anytime */
- return bo;
+ INIT_LIST_HEAD(&bo->bo_list_node);
+ mutex_init(&bo->lock);
-err_release:
- drm_gem_object_release(&bo->base);
-err_free:
- kfree(bo);
- return ERR_PTR(ret);
+ return &bo->base.base;
}
-static void ivpu_bo_free(struct drm_gem_object *obj)
+static struct ivpu_bo *
+ivpu_bo_create(struct ivpu_device *vdev, u64 size, u32 flags)
{
- struct ivpu_bo *bo = to_ivpu_bo(obj);
- struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-
- if (bo->ctx)
- ivpu_dbg(vdev, BO, "free: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n",
- bo->ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped);
- else
- ivpu_dbg(vdev, BO, "free: ctx (released) allocated %d mmu_mapped %d\n",
- (bool)bo->sgt, bo->mmu_mapped);
-
- drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
+ struct drm_gem_shmem_object *shmem;
+ struct ivpu_bo *bo;
- vunmap(bo->kvaddr);
+ switch (flags & DRM_IVPU_BO_CACHE_MASK) {
+ case DRM_IVPU_BO_CACHED:
+ case DRM_IVPU_BO_WC:
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
- if (bo->ctx)
- ivpu_bo_free_vpu_addr(bo);
+ shmem = drm_gem_shmem_create(&vdev->drm, size);
+ if (IS_ERR(shmem))
+ return ERR_CAST(shmem);
- if (bo->sgt)
- ivpu_bo_unmap_and_free_pages(bo);
+ bo = to_ivpu_bo(&shmem->base);
+ bo->base.map_wc = flags & DRM_IVPU_BO_WC;
+ bo->flags = flags;
- if (bo->base.import_attach)
- drm_prime_gem_destroy(&bo->base, bo->sgt);
+ mutex_lock(&vdev->bo_list_lock);
+ list_add_tail(&bo->bo_list_node, &vdev->bo_list);
+ mutex_unlock(&vdev->bo_list_lock);
- drm_gem_object_release(&bo->base);
+ ivpu_dbg(vdev, BO, "create: vpu_addr 0x%llx size %zu flags 0x%x\n",
+ bo->vpu_addr, bo->base.base.size, flags);
- mutex_destroy(&bo->lock);
- kfree(bo);
+ return bo;
}
-static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file)
{
+ struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_device *vdev = file_priv->vdev;
struct ivpu_bo *bo = to_ivpu_bo(obj);
- struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-
- ivpu_dbg(vdev, BO, "mmap: ctx %u handle %u vpu_addr 0x%llx size %zu type %s",
- bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo), bo->ops->name);
+ struct ivpu_addr_range *range;
- if (obj->import_attach) {
- /* Drop the reference drm_gem_mmap_obj() acquired.*/
- drm_gem_object_put(obj);
- vma->vm_private_data = NULL;
- return dma_buf_mmap(obj->dma_buf, vma, 0);
- }
-
- vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND);
- vma->vm_page_prot = ivpu_bo_pgprot(bo, vm_get_page_prot(vma->vm_flags));
+ if (bo->flags & DRM_IVPU_BO_SHAVE_MEM)
+ range = &vdev->hw->ranges.shave;
+ else if (bo->flags & DRM_IVPU_BO_DMA_MEM)
+ range = &vdev->hw->ranges.dma;
+ else
+ range = &vdev->hw->ranges.user;
- return 0;
+ return ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, range);
}
-static struct sg_table *ivpu_bo_get_sg_table(struct drm_gem_object *obj)
+static void ivpu_bo_free(struct drm_gem_object *obj)
{
+ struct ivpu_device *vdev = to_ivpu_device(obj->dev);
struct ivpu_bo *bo = to_ivpu_bo(obj);
- loff_t npages = obj->size >> PAGE_SHIFT;
- int ret = 0;
- mutex_lock(&bo->lock);
+ mutex_lock(&vdev->bo_list_lock);
+ list_del(&bo->bo_list_node);
+ mutex_unlock(&vdev->bo_list_lock);
- if (!bo->sgt)
- ret = ivpu_bo_alloc_and_map_pages_locked(bo);
+ drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
- mutex_unlock(&bo->lock);
+ ivpu_dbg_bo(vdev, bo, "free");
- if (ret)
- return ERR_PTR(ret);
+ ivpu_bo_unbind(bo);
+ mutex_destroy(&bo->lock);
- return drm_prime_pages_to_sg(obj->dev, bo->pages, npages);
+ drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1);
+ drm_gem_shmem_free(&bo->base);
}
-static vm_fault_t ivpu_vm_fault(struct vm_fault *vmf)
-{
- struct vm_area_struct *vma = vmf->vma;
- struct drm_gem_object *obj = vma->vm_private_data;
- struct ivpu_bo *bo = to_ivpu_bo(obj);
- loff_t npages = obj->size >> PAGE_SHIFT;
- pgoff_t page_offset;
- struct page *page;
- vm_fault_t ret;
- int err;
-
- mutex_lock(&bo->lock);
-
- if (!bo->sgt) {
- err = ivpu_bo_alloc_and_map_pages_locked(bo);
- if (err) {
- ret = vmf_error(err);
- goto unlock;
- }
- }
-
- /* We don't use vmf->pgoff since that has the fake offset */
- page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
- if (page_offset >= npages) {
- ret = VM_FAULT_SIGBUS;
- } else {
- page = bo->pages[page_offset];
- ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
- }
-
-unlock:
- mutex_unlock(&bo->lock);
+static const struct dma_buf_ops ivpu_bo_dmabuf_ops = {
+ .cache_sgt_mapping = true,
+ .attach = drm_gem_map_attach,
+ .detach = drm_gem_map_detach,
+ .map_dma_buf = drm_gem_map_dma_buf,
+ .unmap_dma_buf = drm_gem_unmap_dma_buf,
+ .release = drm_gem_dmabuf_release,
+ .mmap = drm_gem_dmabuf_mmap,
+ .vmap = drm_gem_dmabuf_vmap,
+ .vunmap = drm_gem_dmabuf_vunmap,
+};
- return ret;
+static struct dma_buf *ivpu_bo_export(struct drm_gem_object *obj, int flags)
+{
+ struct drm_device *dev = obj->dev;
+ struct dma_buf_export_info exp_info = {
+ .exp_name = KBUILD_MODNAME,
+ .owner = dev->driver->fops->owner,
+ .ops = &ivpu_bo_dmabuf_ops,
+ .size = obj->size,
+ .flags = flags,
+ .priv = obj,
+ .resv = obj->resv,
+ };
+ void *sgt;
+
+ /*
+ * Make sure that pages are allocated and dma-mapped before exporting the bo.
+ * DMA-mapping is required if the bo will be imported to the same device.
+ */
+ sgt = drm_gem_shmem_get_pages_sgt(to_drm_gem_shmem_obj(obj));
+ if (IS_ERR(sgt))
+ return sgt;
+
+ return drm_gem_dmabuf_export(dev, &exp_info);
}
-static const struct vm_operations_struct ivpu_vm_ops = {
- .fault = ivpu_vm_fault,
- .open = drm_gem_vm_open,
- .close = drm_gem_vm_close,
-};
-
static const struct drm_gem_object_funcs ivpu_gem_funcs = {
.free = ivpu_bo_free,
- .mmap = ivpu_bo_mmap,
- .vm_ops = &ivpu_vm_ops,
- .get_sg_table = ivpu_bo_get_sg_table,
+ .open = ivpu_bo_open,
+ .export = ivpu_bo_export,
+ .print_info = drm_gem_shmem_object_print_info,
+ .pin = drm_gem_shmem_object_pin,
+ .unpin = drm_gem_shmem_object_unpin,
+ .get_sg_table = drm_gem_shmem_object_get_sg_table,
+ .vmap = drm_gem_shmem_object_vmap,
+ .vunmap = drm_gem_shmem_object_vunmap,
+ .mmap = drm_gem_shmem_object_mmap,
+ .vm_ops = &drm_gem_shmem_vm_ops,
};
-int
-ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct ivpu_file_priv *file_priv = file->driver_priv;
struct ivpu_device *vdev = file_priv->vdev;
@@ -537,23 +308,20 @@ ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
if (size == 0)
return -EINVAL;
- bo = ivpu_bo_alloc(vdev, &file_priv->ctx, size, args->flags, &shmem_ops, NULL, 0);
+ bo = ivpu_bo_create(vdev, size, args->flags);
if (IS_ERR(bo)) {
ivpu_err(vdev, "Failed to create BO: %pe (ctx %u size %llu flags 0x%x)",
bo, file_priv->ctx.id, args->size, args->flags);
return PTR_ERR(bo);
}
- ret = drm_gem_handle_create(file, &bo->base, &bo->handle);
+ ret = drm_gem_handle_create(file, &bo->base.base, &bo->handle);
if (!ret) {
args->vpu_addr = bo->vpu_addr;
args->handle = bo->handle;
}
- drm_gem_object_put(&bo->base);
-
- ivpu_dbg(vdev, BO, "alloc shmem: ctx %u vpu_addr 0x%llx size %zu flags 0x%x\n",
- file_priv->ctx.id, bo->vpu_addr, ivpu_bo_size(bo), bo->flags);
+ drm_gem_object_put(&bo->base.base);
return ret;
}
@@ -563,8 +331,8 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
{
const struct ivpu_addr_range *range;
struct ivpu_addr_range fixed_range;
+ struct iosys_map map;
struct ivpu_bo *bo;
- pgprot_t prot;
int ret;
drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(vpu_addr));
@@ -578,81 +346,42 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
range = &vdev->hw->ranges.global;
}
- bo = ivpu_bo_alloc(vdev, &vdev->gctx, size, flags, &internal_ops, range, 0);
+ bo = ivpu_bo_create(vdev, size, flags);
if (IS_ERR(bo)) {
ivpu_err(vdev, "Failed to create BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
bo, vpu_addr, size, flags);
return NULL;
}
- ret = ivpu_bo_pin(bo);
+ ret = ivpu_bo_alloc_vpu_addr(bo, &vdev->gctx, range);
if (ret)
goto err_put;
- if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
- drm_clflush_pages(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
-
- if (bo->flags & DRM_IVPU_BO_WC)
- set_pages_array_wc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
- else if (bo->flags & DRM_IVPU_BO_UNCACHED)
- set_pages_array_uc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
-
- prot = ivpu_bo_pgprot(bo, PAGE_KERNEL);
- bo->kvaddr = vmap(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT, VM_MAP, prot);
- if (!bo->kvaddr) {
- ivpu_err(vdev, "Failed to map BO into kernel virtual memory\n");
+ ret = ivpu_bo_pin(bo);
+ if (ret)
goto err_put;
- }
- ivpu_dbg(vdev, BO, "alloc internal: ctx 0 vpu_addr 0x%llx size %zu flags 0x%x\n",
- bo->vpu_addr, ivpu_bo_size(bo), flags);
+ ret = drm_gem_shmem_vmap(&bo->base, &map);
+ if (ret)
+ goto err_put;
return bo;
err_put:
- drm_gem_object_put(&bo->base);
+ drm_gem_object_put(&bo->base.base);
return NULL;
}
void ivpu_bo_free_internal(struct ivpu_bo *bo)
{
- drm_gem_object_put(&bo->base);
-}
-
-struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *buf)
-{
- struct ivpu_device *vdev = to_ivpu_device(dev);
- struct dma_buf_attachment *attach;
- struct ivpu_bo *bo;
-
- attach = dma_buf_attach(buf, dev->dev);
- if (IS_ERR(attach))
- return ERR_CAST(attach);
-
- get_dma_buf(buf);
+ struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr);
- bo = ivpu_bo_alloc(vdev, NULL, buf->size, DRM_IVPU_BO_MAPPABLE, &prime_ops, NULL, 0);
- if (IS_ERR(bo)) {
- ivpu_err(vdev, "Failed to import BO: %pe (size %lu)", bo, buf->size);
- goto err_detach;
- }
-
- lockdep_set_class(&bo->lock, &prime_bo_lock_class_key);
-
- bo->base.import_attach = attach;
-
- return &bo->base;
-
-err_detach:
- dma_buf_detach(buf, attach);
- dma_buf_put(buf);
- return ERR_CAST(bo);
+ drm_gem_shmem_vunmap(&bo->base, &map);
+ drm_gem_object_put(&bo->base.base);
}
int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
- struct ivpu_file_priv *file_priv = file->driver_priv;
- struct ivpu_device *vdev = to_ivpu_device(dev);
struct drm_ivpu_bo_info *args = data;
struct drm_gem_object *obj;
struct ivpu_bo *bo;
@@ -665,21 +394,12 @@ int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file
bo = to_ivpu_bo(obj);
mutex_lock(&bo->lock);
-
- if (!bo->ctx) {
- ret = ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, NULL);
- if (ret) {
- ivpu_err(vdev, "Failed to allocate vpu_addr: %d\n", ret);
- goto unlock;
- }
- }
-
args->flags = bo->flags;
args->mmap_offset = drm_vma_node_offset_addr(&obj->vma_node);
args->vpu_addr = bo->vpu_addr;
args->size = obj->size;
-unlock:
mutex_unlock(&bo->lock);
+
drm_gem_object_put(obj);
return ret;
}
@@ -714,41 +434,41 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
{
unsigned long dma_refcount = 0;
- if (bo->base.dma_buf && bo->base.dma_buf->file)
- dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count);
+ mutex_lock(&bo->lock);
+
+ if (bo->base.base.dma_buf && bo->base.base.dma_buf->file)
+ dma_refcount = atomic_long_read(&bo->base.base.dma_buf->file->f_count);
+
+ drm_printf(p, "%-3u %-6d 0x%-12llx %-10lu 0x%-8x %-4u %-8lu",
+ bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.base.size,
+ bo->flags, kref_read(&bo->base.base.refcount), dma_refcount);
+
+ if (bo->base.base.import_attach)
+ drm_printf(p, " imported");
+
+ if (bo->base.pages)
+ drm_printf(p, " has_pages");
- drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n",
- bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo),
- kref_read(&bo->base.refcount), dma_refcount, bo->ops->name);
+ if (bo->mmu_mapped)
+ drm_printf(p, " mmu_mapped");
+
+ drm_printf(p, "\n");
+
+ mutex_unlock(&bo->lock);
}
void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p)
{
struct ivpu_device *vdev = to_ivpu_device(dev);
- struct ivpu_file_priv *file_priv;
- unsigned long ctx_id;
struct ivpu_bo *bo;
- drm_printf(p, "%5s %6s %16s %10s %10s %12s %14s\n",
- "ctx", "handle", "vpu_addr", "size", "refcount", "dma_refcount", "type");
+ drm_printf(p, "%-3s %-6s %-14s %-10s %-10s %-4s %-8s %s\n",
+ "ctx", "handle", "vpu_addr", "size", "flags", "refs", "dma_refs", "attribs");
- mutex_lock(&vdev->gctx.lock);
- list_for_each_entry(bo, &vdev->gctx.bo_list, ctx_node)
+ mutex_lock(&vdev->bo_list_lock);
+ list_for_each_entry(bo, &vdev->bo_list, bo_list_node)
ivpu_bo_print_info(bo, p);
- mutex_unlock(&vdev->gctx.lock);
-
- xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
- file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id);
- if (!file_priv)
- continue;
-
- mutex_lock(&file_priv->ctx.lock);
- list_for_each_entry(bo, &file_priv->ctx.bo_list, ctx_node)
- ivpu_bo_print_info(bo, p);
- mutex_unlock(&file_priv->ctx.lock);
-
- ivpu_file_priv_put(&file_priv);
- }
+ mutex_unlock(&vdev->bo_list_lock);
}
void ivpu_bo_list_print(struct drm_device *dev)
diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h
index a0b4d4a32b3b..d75cad0d3c74 100644
--- a/drivers/accel/ivpu/ivpu_gem.h
+++ b/drivers/accel/ivpu/ivpu_gem.h
@@ -6,84 +6,52 @@
#define __IVPU_GEM_H__
#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
#include <drm/drm_mm.h>
-struct dma_buf;
-struct ivpu_bo_ops;
struct ivpu_file_priv;
struct ivpu_bo {
- struct drm_gem_object base;
- const struct ivpu_bo_ops *ops;
-
+ struct drm_gem_shmem_object base;
struct ivpu_mmu_context *ctx;
- struct list_head ctx_node;
+ struct list_head bo_list_node;
struct drm_mm_node mm_node;
- struct mutex lock; /* Protects: pages, sgt, mmu_mapped */
- struct sg_table *sgt;
- struct page **pages;
- bool mmu_mapped;
-
- void *kvaddr;
+ struct mutex lock; /* Protects: ctx, mmu_mapped, vpu_addr */
u64 vpu_addr;
u32 handle;
u32 flags;
- uintptr_t user_ptr;
- u32 job_status;
-};
-
-enum ivpu_bo_type {
- IVPU_BO_TYPE_SHMEM = 1,
- IVPU_BO_TYPE_INTERNAL,
- IVPU_BO_TYPE_PRIME,
-};
-
-struct ivpu_bo_ops {
- enum ivpu_bo_type type;
- const char *name;
- int (*alloc_pages)(struct ivpu_bo *bo);
- void (*free_pages)(struct ivpu_bo *bo);
- int (*map_pages)(struct ivpu_bo *bo);
- void (*unmap_pages)(struct ivpu_bo *bo);
+ u32 job_status; /* Valid only for command buffer */
+ bool mmu_mapped;
};
int ivpu_bo_pin(struct ivpu_bo *bo);
-void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx);
-void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p);
-void ivpu_bo_list_print(struct drm_device *dev);
+void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
-struct ivpu_bo *
-ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags);
+struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size);
+struct ivpu_bo *ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags);
void ivpu_bo_free_internal(struct ivpu_bo *bo);
-struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
-void ivpu_bo_unmap_sgt_and_remove_from_context(struct ivpu_bo *bo);
int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p);
+void ivpu_bo_list_print(struct drm_device *dev);
+
static inline struct ivpu_bo *to_ivpu_bo(struct drm_gem_object *obj)
{
- return container_of(obj, struct ivpu_bo, base);
+ return container_of(obj, struct ivpu_bo, base.base);
}
static inline void *ivpu_bo_vaddr(struct ivpu_bo *bo)
{
- return bo->kvaddr;
+ return bo->base.vaddr;
}
static inline size_t ivpu_bo_size(struct ivpu_bo *bo)
{
- return bo->base.size;
-}
-
-static inline struct page *ivpu_bo_get_page(struct ivpu_bo *bo, u64 offset)
-{
- if (offset > ivpu_bo_size(bo) || !bo->pages)
- return NULL;
-
- return bo->pages[offset / PAGE_SIZE];
+ return bo->base.base.size;
}
static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo)
@@ -96,20 +64,9 @@ static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo)
return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED;
}
-static inline pgprot_t ivpu_bo_pgprot(struct ivpu_bo *bo, pgprot_t prot)
-{
- if (bo->flags & DRM_IVPU_BO_WC)
- return pgprot_writecombine(prot);
-
- if (bo->flags & DRM_IVPU_BO_UNCACHED)
- return pgprot_noncached(prot);
-
- return prot;
-}
-
static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo)
{
- return to_ivpu_device(bo->base.dev);
+ return to_ivpu_device(bo->base.base.dev);
}
static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr)
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index 1079e06255ba..b2909168a0a6 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -15,8 +15,11 @@ struct ivpu_hw_ops {
int (*power_down)(struct ivpu_device *vdev);
int (*reset)(struct ivpu_device *vdev);
bool (*is_idle)(struct ivpu_device *vdev);
+ int (*wait_for_idle)(struct ivpu_device *vdev);
void (*wdt_disable)(struct ivpu_device *vdev);
void (*diagnose_failure)(struct ivpu_device *vdev);
+ u32 (*profiling_freq_get)(struct ivpu_device *vdev);
+ void (*profiling_freq_drive)(struct ivpu_device *vdev, bool enable);
u32 (*reg_pll_freq_get)(struct ivpu_device *vdev);
u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev);
u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev);
@@ -58,6 +61,8 @@ struct ivpu_hw_info {
u32 sku;
u16 config;
int dma_bits;
+ ktime_t d0i3_entry_host_ts;
+ u64 d0i3_entry_vpu_ts;
};
extern const struct ivpu_hw_ops ivpu_hw_37xx_ops;
@@ -85,6 +90,11 @@ static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev)
return vdev->hw->ops->is_idle(vdev);
};
+static inline int ivpu_hw_wait_for_idle(struct ivpu_device *vdev)
+{
+ return vdev->hw->ops->wait_for_idle(vdev);
+};
+
static inline int ivpu_hw_power_down(struct ivpu_device *vdev)
{
ivpu_dbg(vdev, PM, "HW power down\n");
@@ -104,6 +114,16 @@ static inline void ivpu_hw_wdt_disable(struct ivpu_device *vdev)
vdev->hw->ops->wdt_disable(vdev);
};
+static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev)
+{
+ return vdev->hw->ops->profiling_freq_get(vdev);
+};
+
+static inline void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
+{
+ return vdev->hw->ops->profiling_freq_drive(vdev, enable);
+};
+
/* Register indirect accesses */
static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev)
{
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c
index d530384f8d60..574cdeefb66b 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@@ -29,6 +29,7 @@
#define PLL_REF_CLK_FREQ (50 * 1000000)
#define PLL_SIMULATION_FREQ (10 * 1000000)
+#define PLL_PROF_CLK_FREQ (38400 * 1000)
#define PLL_DEFAULT_EPP_VALUE 0x80
#define TIM_SAFE_ENABLE 0xf1d0dead
@@ -37,7 +38,7 @@
#define TIMEOUT_US (150 * USEC_PER_MSEC)
#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC)
#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC)
-#define IDLE_TIMEOUT_US (500 * USEC_PER_MSEC)
+#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC)
#define ICB_0_IRQ_MASK ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \
(REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \
@@ -96,6 +97,7 @@ static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
vdev->timeout.tdr = 2000;
vdev->timeout.reschedule_suspend = 10;
vdev->timeout.autosuspend = 10;
+ vdev->timeout.d0i3_entry_msg = 5;
}
static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev)
@@ -722,10 +724,23 @@ static bool ivpu_hw_37xx_is_idle(struct ivpu_device *vdev)
REG_TEST_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, val);
}
+static int ivpu_hw_37xx_wait_for_idle(struct ivpu_device *vdev)
+{
+ return REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
+}
+
+static void ivpu_hw_37xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev)
+{
+ vdev->hw->d0i3_entry_host_ts = ktime_get_boottime();
+ vdev->hw->d0i3_entry_vpu_ts = REGV_RD64(VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT);
+}
+
static int ivpu_hw_37xx_power_down(struct ivpu_device *vdev)
{
int ret = 0;
+ ivpu_hw_37xx_save_d0i3_entry_timestamp(vdev);
+
if (!ivpu_hw_37xx_is_idle(vdev))
ivpu_warn(vdev, "VPU not idle during power down\n");
@@ -760,6 +775,16 @@ static void ivpu_hw_37xx_wdt_disable(struct ivpu_device *vdev)
REGV_WR32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, val);
}
+static u32 ivpu_hw_37xx_profiling_freq_get(struct ivpu_device *vdev)
+{
+ return PLL_PROF_CLK_FREQ;
+}
+
+static void ivpu_hw_37xx_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
+{
+ /* Profiling freq - is a debug feature. Unavailable on VPU 37XX. */
+}
+
static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config)
{
u32 pll_clock = PLL_REF_CLK_FREQ * ratio;
@@ -871,17 +896,20 @@ static void ivpu_hw_37xx_irq_noc_firewall_handler(struct ivpu_device *vdev)
}
/* Handler for IRQs from VPU core (irqV) */
-static u32 ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq)
+static bool ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq, bool *wake_thread)
{
u32 status = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK;
+ if (!status)
+ return false;
+
REGV_WR32(VPU_37XX_HOST_SS_ICB_CLEAR_0, status);
if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status))
ivpu_mmu_irq_evtq_handler(vdev);
if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status))
- ivpu_ipc_irq_handler(vdev);
+ ivpu_ipc_irq_handler(vdev, wake_thread);
if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status))
ivpu_dbg(vdev, IRQ, "MMU sync complete\n");
@@ -898,17 +926,17 @@ static u32 ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq)
if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status))
ivpu_hw_37xx_irq_noc_firewall_handler(vdev);
- return status;
+ return true;
}
/* Handler for IRQs from Buttress core (irqB) */
-static u32 ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq)
+static bool ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq)
{
u32 status = REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK;
bool schedule_recovery = false;
- if (status == 0)
- return 0;
+ if (!status)
+ return false;
if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status))
ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x",
@@ -944,23 +972,27 @@ static u32 ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq)
if (schedule_recovery)
ivpu_pm_schedule_recovery(vdev);
- return status;
+ return true;
}
static irqreturn_t ivpu_hw_37xx_irq_handler(int irq, void *ptr)
{
struct ivpu_device *vdev = ptr;
- u32 ret_irqv, ret_irqb;
+ bool irqv_handled, irqb_handled, wake_thread = false;
REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
- ret_irqv = ivpu_hw_37xx_irqv_handler(vdev, irq);
- ret_irqb = ivpu_hw_37xx_irqb_handler(vdev, irq);
+ irqv_handled = ivpu_hw_37xx_irqv_handler(vdev, irq, &wake_thread);
+ irqb_handled = ivpu_hw_37xx_irqb_handler(vdev, irq);
/* Re-enable global interrupts to re-trigger MSI for pending interrupts */
REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
- return IRQ_RETVAL(ret_irqb | ret_irqv);
+ if (wake_thread)
+ return IRQ_WAKE_THREAD;
+ if (irqv_handled || irqb_handled)
+ return IRQ_HANDLED;
+ return IRQ_NONE;
}
static void ivpu_hw_37xx_diagnose_failure(struct ivpu_device *vdev)
@@ -997,11 +1029,14 @@ const struct ivpu_hw_ops ivpu_hw_37xx_ops = {
.info_init = ivpu_hw_37xx_info_init,
.power_up = ivpu_hw_37xx_power_up,
.is_idle = ivpu_hw_37xx_is_idle,
+ .wait_for_idle = ivpu_hw_37xx_wait_for_idle,
.power_down = ivpu_hw_37xx_power_down,
.reset = ivpu_hw_37xx_reset,
.boot_fw = ivpu_hw_37xx_boot_fw,
.wdt_disable = ivpu_hw_37xx_wdt_disable,
.diagnose_failure = ivpu_hw_37xx_diagnose_failure,
+ .profiling_freq_get = ivpu_hw_37xx_profiling_freq_get,
+ .profiling_freq_drive = ivpu_hw_37xx_profiling_freq_drive,
.reg_pll_freq_get = ivpu_hw_37xx_reg_pll_freq_get,
.reg_telemetry_offset_get = ivpu_hw_37xx_reg_telemetry_offset_get,
.reg_telemetry_size_get = ivpu_hw_37xx_reg_telemetry_size_get,
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
index 4083beb5e9db..f6fec1919202 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
+++ b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
@@ -240,6 +240,8 @@
#define VPU_37XX_CPU_SS_TIM_GEN_CONFIG 0x06021008u
#define VPU_37XX_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK BIT_MASK(9)
+#define VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT 0x06029000u
+
#define VPU_37XX_CPU_SS_DOORBELL_0 0x06300000u
#define VPU_37XX_CPU_SS_DOORBELL_0_SET_MASK BIT_MASK(0)
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c
index e691c49c9841..eba2fdef2ace 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
@@ -39,6 +39,7 @@
#define TIMEOUT_US (150 * USEC_PER_MSEC)
#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC)
#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC)
+#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC)
#define WEIGHTS_DEFAULT 0xf711f711u
#define WEIGHTS_ATS_DEFAULT 0x0000f711u
@@ -139,18 +140,21 @@ static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
vdev->timeout.tdr = 2000000;
vdev->timeout.reschedule_suspend = 1000;
vdev->timeout.autosuspend = -1;
+ vdev->timeout.d0i3_entry_msg = 500;
} else if (ivpu_is_simics(vdev)) {
vdev->timeout.boot = 50;
vdev->timeout.jsm = 500;
vdev->timeout.tdr = 10000;
vdev->timeout.reschedule_suspend = 10;
vdev->timeout.autosuspend = -1;
+ vdev->timeout.d0i3_entry_msg = 100;
} else {
vdev->timeout.boot = 1000;
vdev->timeout.jsm = 500;
vdev->timeout.tdr = 2000;
vdev->timeout.reschedule_suspend = 10;
vdev->timeout.autosuspend = 10;
+ vdev->timeout.d0i3_entry_msg = 5;
}
}
@@ -824,12 +828,6 @@ static int ivpu_hw_40xx_power_up(struct ivpu_device *vdev)
{
int ret;
- ret = ivpu_hw_40xx_reset(vdev);
- if (ret) {
- ivpu_err(vdev, "Failed to reset HW: %d\n", ret);
- return ret;
- }
-
ret = ivpu_hw_40xx_d0i3_disable(vdev);
if (ret)
ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret);
@@ -898,10 +896,23 @@ static bool ivpu_hw_40xx_is_idle(struct ivpu_device *vdev)
REG_TEST_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, val);
}
+static int ivpu_hw_40xx_wait_for_idle(struct ivpu_device *vdev)
+{
+ return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
+}
+
+static void ivpu_hw_40xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev)
+{
+ vdev->hw->d0i3_entry_host_ts = ktime_get_boottime();
+ vdev->hw->d0i3_entry_vpu_ts = REGV_RD64(VPU_40XX_CPU_SS_TIM_PERF_EXT_FREE_CNT);
+}
+
static int ivpu_hw_40xx_power_down(struct ivpu_device *vdev)
{
int ret = 0;
+ ivpu_hw_40xx_save_d0i3_entry_timestamp(vdev);
+
if (!ivpu_hw_40xx_is_idle(vdev) && ivpu_hw_40xx_reset(vdev))
ivpu_warn(vdev, "Failed to reset the VPU\n");
@@ -933,6 +944,19 @@ static void ivpu_hw_40xx_wdt_disable(struct ivpu_device *vdev)
REGV_WR32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, val);
}
+static u32 ivpu_hw_40xx_profiling_freq_get(struct ivpu_device *vdev)
+{
+ return vdev->hw->pll.profiling_freq;
+}
+
+static void ivpu_hw_40xx_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
+{
+ if (enable)
+ vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_HIGH;
+ else
+ vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT;
+}
+
/* Register indirect accesses */
static u32 ivpu_hw_40xx_reg_pll_freq_get(struct ivpu_device *vdev)
{
@@ -1023,13 +1047,12 @@ static void ivpu_hw_40xx_irq_noc_firewall_handler(struct ivpu_device *vdev)
}
/* Handler for IRQs from VPU core (irqV) */
-static irqreturn_t ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq)
+static bool ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq, bool *wake_thread)
{
u32 status = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK;
- irqreturn_t ret = IRQ_NONE;
if (!status)
- return IRQ_NONE;
+ return false;
REGV_WR32(VPU_40XX_HOST_SS_ICB_CLEAR_0, status);
@@ -1037,7 +1060,7 @@ static irqreturn_t ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq)
ivpu_mmu_irq_evtq_handler(vdev);
if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status))
- ret |= ivpu_ipc_irq_handler(vdev);
+ ivpu_ipc_irq_handler(vdev, wake_thread);
if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status))
ivpu_dbg(vdev, IRQ, "MMU sync complete\n");
@@ -1054,17 +1077,17 @@ static irqreturn_t ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq)
if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status))
ivpu_hw_40xx_irq_noc_firewall_handler(vdev);
- return ret;
+ return true;
}
/* Handler for IRQs from Buttress core (irqB) */
-static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
+static bool ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
{
bool schedule_recovery = false;
u32 status = REGB_RD32(VPU_40XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK;
- if (status == 0)
- return IRQ_NONE;
+ if (!status)
+ return false;
if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status))
ivpu_dbg(vdev, IRQ, "FREQ_CHANGE");
@@ -1116,26 +1139,27 @@ static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
if (schedule_recovery)
ivpu_pm_schedule_recovery(vdev);
- return IRQ_HANDLED;
+ return true;
}
static irqreturn_t ivpu_hw_40xx_irq_handler(int irq, void *ptr)
{
+ bool irqv_handled, irqb_handled, wake_thread = false;
struct ivpu_device *vdev = ptr;
- irqreturn_t ret = IRQ_NONE;
REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
- ret |= ivpu_hw_40xx_irqv_handler(vdev, irq);
- ret |= ivpu_hw_40xx_irqb_handler(vdev, irq);
+ irqv_handled = ivpu_hw_40xx_irqv_handler(vdev, irq, &wake_thread);
+ irqb_handled = ivpu_hw_40xx_irqb_handler(vdev, irq);
/* Re-enable global interrupts to re-trigger MSI for pending interrupts */
REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
- if (ret & IRQ_WAKE_THREAD)
+ if (wake_thread)
return IRQ_WAKE_THREAD;
-
- return ret;
+ if (irqv_handled || irqb_handled)
+ return IRQ_HANDLED;
+ return IRQ_NONE;
}
static void ivpu_hw_40xx_diagnose_failure(struct ivpu_device *vdev)
@@ -1185,11 +1209,14 @@ const struct ivpu_hw_ops ivpu_hw_40xx_ops = {
.info_init = ivpu_hw_40xx_info_init,
.power_up = ivpu_hw_40xx_power_up,
.is_idle = ivpu_hw_40xx_is_idle,
+ .wait_for_idle = ivpu_hw_40xx_wait_for_idle,
.power_down = ivpu_hw_40xx_power_down,
.reset = ivpu_hw_40xx_reset,
.boot_fw = ivpu_hw_40xx_boot_fw,
.wdt_disable = ivpu_hw_40xx_wdt_disable,
.diagnose_failure = ivpu_hw_40xx_diagnose_failure,
+ .profiling_freq_get = ivpu_hw_40xx_profiling_freq_get,
+ .profiling_freq_drive = ivpu_hw_40xx_profiling_freq_drive,
.reg_pll_freq_get = ivpu_hw_40xx_reg_pll_freq_get,
.reg_telemetry_offset_get = ivpu_hw_40xx_reg_telemetry_offset_get,
.reg_telemetry_size_get = ivpu_hw_40xx_reg_telemetry_size_get,
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index a4ca40b184d4..e86621f16f85 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -5,7 +5,7 @@
#include <linux/genalloc.h>
#include <linux/highmem.h>
-#include <linux/kthread.h>
+#include <linux/pm_runtime.h>
#include <linux/wait.h>
#include "ivpu_drv.h"
@@ -17,19 +17,12 @@
#include "ivpu_pm.h"
#define IPC_MAX_RX_MSG 128
-#define IS_KTHREAD() (get_current()->flags & PF_KTHREAD)
struct ivpu_ipc_tx_buf {
struct ivpu_ipc_hdr ipc;
struct vpu_jsm_msg jsm;
};
-struct ivpu_ipc_rx_msg {
- struct list_head link;
- struct ivpu_ipc_hdr *ipc_hdr;
- struct vpu_jsm_msg *jsm_msg;
-};
-
static void ivpu_ipc_msg_dump(struct ivpu_device *vdev, char *c,
struct ivpu_ipc_hdr *ipc_hdr, u32 vpu_addr)
{
@@ -139,8 +132,49 @@ static void ivpu_ipc_tx(struct ivpu_device *vdev, u32 vpu_addr)
ivpu_hw_reg_ipc_tx_set(vdev, vpu_addr);
}
-void
-ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, u32 channel)
+static void
+ivpu_ipc_rx_msg_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+ struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg)
+{
+ struct ivpu_ipc_info *ipc = vdev->ipc;
+ struct ivpu_ipc_rx_msg *rx_msg;
+
+ lockdep_assert_held(&ipc->cons_lock);
+ lockdep_assert_irqs_disabled();
+
+ rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC);
+ if (!rx_msg) {
+ ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg);
+ return;
+ }
+
+ atomic_inc(&ipc->rx_msg_count);
+
+ rx_msg->ipc_hdr = ipc_hdr;
+ rx_msg->jsm_msg = jsm_msg;
+ rx_msg->callback = cons->rx_callback;
+
+ if (rx_msg->callback) {
+ list_add_tail(&rx_msg->link, &ipc->cb_msg_list);
+ } else {
+ spin_lock(&cons->rx_lock);
+ list_add_tail(&rx_msg->link, &cons->rx_msg_list);
+ spin_unlock(&cons->rx_lock);
+ wake_up(&cons->rx_msg_wq);
+ }
+}
+
+static void
+ivpu_ipc_rx_msg_del(struct ivpu_device *vdev, struct ivpu_ipc_rx_msg *rx_msg)
+{
+ list_del(&rx_msg->link);
+ ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
+ atomic_dec(&vdev->ipc->rx_msg_count);
+ kfree(rx_msg);
+}
+
+void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+ u32 channel, ivpu_ipc_rx_callback_t rx_callback)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
@@ -148,13 +182,15 @@ ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
cons->channel = channel;
cons->tx_vpu_addr = 0;
cons->request_id = 0;
- spin_lock_init(&cons->rx_msg_lock);
+ cons->aborted = false;
+ cons->rx_callback = rx_callback;
+ spin_lock_init(&cons->rx_lock);
INIT_LIST_HEAD(&cons->rx_msg_list);
init_waitqueue_head(&cons->rx_msg_wq);
- spin_lock_irq(&ipc->cons_list_lock);
+ spin_lock_irq(&ipc->cons_lock);
list_add_tail(&cons->link, &ipc->cons_list);
- spin_unlock_irq(&ipc->cons_list_lock);
+ spin_unlock_irq(&ipc->cons_lock);
}
void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons)
@@ -162,18 +198,14 @@ void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *c
struct ivpu_ipc_info *ipc = vdev->ipc;
struct ivpu_ipc_rx_msg *rx_msg, *r;
- spin_lock_irq(&ipc->cons_list_lock);
+ spin_lock_irq(&ipc->cons_lock);
list_del(&cons->link);
- spin_unlock_irq(&ipc->cons_list_lock);
-
- spin_lock_irq(&cons->rx_msg_lock);
- list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link) {
- list_del(&rx_msg->link);
- ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
- atomic_dec(&ipc->rx_msg_count);
- kfree(rx_msg);
- }
- spin_unlock_irq(&cons->rx_msg_lock);
+ spin_unlock_irq(&ipc->cons_lock);
+
+ spin_lock_irq(&cons->rx_lock);
+ list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link)
+ ivpu_ipc_rx_msg_del(vdev, rx_msg);
+ spin_unlock_irq(&cons->rx_lock);
ivpu_ipc_tx_release(vdev, cons->tx_vpu_addr);
}
@@ -202,52 +234,61 @@ unlock:
return ret;
}
+static bool ivpu_ipc_rx_need_wakeup(struct ivpu_ipc_consumer *cons)
+{
+ bool ret;
+
+ spin_lock_irq(&cons->rx_lock);
+ ret = !list_empty(&cons->rx_msg_list) || cons->aborted;
+ spin_unlock_irq(&cons->rx_lock);
+
+ return ret;
+}
+
int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
struct ivpu_ipc_hdr *ipc_buf,
- struct vpu_jsm_msg *ipc_payload, unsigned long timeout_ms)
+ struct vpu_jsm_msg *jsm_msg, unsigned long timeout_ms)
{
- struct ivpu_ipc_info *ipc = vdev->ipc;
struct ivpu_ipc_rx_msg *rx_msg;
int wait_ret, ret = 0;
+ if (drm_WARN_ONCE(&vdev->drm, cons->rx_callback, "Consumer works only in async mode\n"))
+ return -EINVAL;
+
wait_ret = wait_event_timeout(cons->rx_msg_wq,
- (IS_KTHREAD() && kthread_should_stop()) ||
- !list_empty(&cons->rx_msg_list),
+ ivpu_ipc_rx_need_wakeup(cons),
msecs_to_jiffies(timeout_ms));
- if (IS_KTHREAD() && kthread_should_stop())
- return -EINTR;
-
if (wait_ret == 0)
return -ETIMEDOUT;
- spin_lock_irq(&cons->rx_msg_lock);
+ spin_lock_irq(&cons->rx_lock);
+ if (cons->aborted) {
+ spin_unlock_irq(&cons->rx_lock);
+ return -ECANCELED;
+ }
rx_msg = list_first_entry_or_null(&cons->rx_msg_list, struct ivpu_ipc_rx_msg, link);
if (!rx_msg) {
- spin_unlock_irq(&cons->rx_msg_lock);
+ spin_unlock_irq(&cons->rx_lock);
return -EAGAIN;
}
- list_del(&rx_msg->link);
- spin_unlock_irq(&cons->rx_msg_lock);
if (ipc_buf)
memcpy(ipc_buf, rx_msg->ipc_hdr, sizeof(*ipc_buf));
if (rx_msg->jsm_msg) {
- u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*ipc_payload));
+ u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*jsm_msg));
if (rx_msg->jsm_msg->result != VPU_JSM_STATUS_SUCCESS) {
ivpu_dbg(vdev, IPC, "IPC resp result error: %d\n", rx_msg->jsm_msg->result);
ret = -EBADMSG;
}
- if (ipc_payload)
- memcpy(ipc_payload, rx_msg->jsm_msg, size);
+ if (jsm_msg)
+ memcpy(jsm_msg, rx_msg->jsm_msg, size);
}
- ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
- atomic_dec(&ipc->rx_msg_count);
- kfree(rx_msg);
-
+ ivpu_ipc_rx_msg_del(vdev, rx_msg);
+ spin_unlock_irq(&cons->rx_lock);
return ret;
}
@@ -260,7 +301,7 @@ ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req
struct ivpu_ipc_consumer cons;
int ret;
- ivpu_ipc_consumer_add(vdev, &cons, channel);
+ ivpu_ipc_consumer_add(vdev, &cons, channel, NULL);
ret = ivpu_ipc_send(vdev, &cons, req);
if (ret) {
@@ -285,23 +326,19 @@ consumer_del:
return ret;
}
-int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
- enum vpu_ipc_msg_type expected_resp_type,
- struct vpu_jsm_msg *resp, u32 channel,
- unsigned long timeout_ms)
+int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+ enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
+ u32 channel, unsigned long timeout_ms)
{
struct vpu_jsm_msg hb_req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB };
struct vpu_jsm_msg hb_resp;
int ret, hb_ret;
- ret = ivpu_rpm_get(vdev);
- if (ret < 0)
- return ret;
+ drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev));
- ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp_type, resp,
- channel, timeout_ms);
+ ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp, resp, channel, timeout_ms);
if (ret != -ETIMEDOUT)
- goto rpm_put;
+ return ret;
hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE,
&hb_resp, VPU_IPC_CHAN_ASYNC_CMD,
@@ -311,7 +348,21 @@ int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
ivpu_pm_schedule_recovery(vdev);
}
-rpm_put:
+ return ret;
+}
+
+int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+ enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
+ u32 channel, unsigned long timeout_ms)
+{
+ int ret;
+
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
+ ret = ivpu_ipc_send_receive_active(vdev, req, expected_resp, resp, channel, timeout_ms);
+
ivpu_rpm_put(vdev);
return ret;
}
@@ -329,35 +380,7 @@ ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons
return false;
}
-static void
-ivpu_ipc_dispatch(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
- struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg)
-{
- struct ivpu_ipc_info *ipc = vdev->ipc;
- struct ivpu_ipc_rx_msg *rx_msg;
- unsigned long flags;
-
- lockdep_assert_held(&ipc->cons_list_lock);
-
- rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC);
- if (!rx_msg) {
- ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg);
- return;
- }
-
- atomic_inc(&ipc->rx_msg_count);
-
- rx_msg->ipc_hdr = ipc_hdr;
- rx_msg->jsm_msg = jsm_msg;
-
- spin_lock_irqsave(&cons->rx_msg_lock, flags);
- list_add_tail(&rx_msg->link, &cons->rx_msg_list);
- spin_unlock_irqrestore(&cons->rx_msg_lock, flags);
-
- wake_up(&cons->rx_msg_wq);
-}
-
-int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
+void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
struct ivpu_ipc_consumer *cons;
@@ -375,7 +398,7 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
vpu_addr = ivpu_hw_reg_ipc_rx_addr_get(vdev);
if (vpu_addr == REG_IO_ERROR) {
ivpu_err_ratelimited(vdev, "Failed to read IPC rx addr register\n");
- return -EIO;
+ return;
}
ipc_hdr = ivpu_to_cpu_addr(ipc->mem_rx, vpu_addr);
@@ -405,15 +428,15 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
}
dispatched = false;
- spin_lock_irqsave(&ipc->cons_list_lock, flags);
+ spin_lock_irqsave(&ipc->cons_lock, flags);
list_for_each_entry(cons, &ipc->cons_list, link) {
if (ivpu_ipc_match_consumer(vdev, cons, ipc_hdr, jsm_msg)) {
- ivpu_ipc_dispatch(vdev, cons, ipc_hdr, jsm_msg);
+ ivpu_ipc_rx_msg_add(vdev, cons, ipc_hdr, jsm_msg);
dispatched = true;
break;
}
}
- spin_unlock_irqrestore(&ipc->cons_list_lock, flags);
+ spin_unlock_irqrestore(&ipc->cons_lock, flags);
if (!dispatched) {
ivpu_dbg(vdev, IPC, "IPC RX msg 0x%x dropped (no consumer)\n", vpu_addr);
@@ -421,7 +444,28 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
}
}
- return 0;
+ if (wake_thread)
+ *wake_thread = !list_empty(&ipc->cb_msg_list);
+}
+
+irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev)
+{
+ struct ivpu_ipc_info *ipc = vdev->ipc;
+ struct ivpu_ipc_rx_msg *rx_msg, *r;
+ struct list_head cb_msg_list;
+
+ INIT_LIST_HEAD(&cb_msg_list);
+
+ spin_lock_irq(&ipc->cons_lock);
+ list_splice_tail_init(&ipc->cb_msg_list, &cb_msg_list);
+ spin_unlock_irq(&ipc->cons_lock);
+
+ list_for_each_entry_safe(rx_msg, r, &cb_msg_list, link) {
+ rx_msg->callback(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
+ ivpu_ipc_rx_msg_del(vdev, rx_msg);
+ }
+
+ return IRQ_HANDLED;
}
int ivpu_ipc_init(struct ivpu_device *vdev)
@@ -456,10 +500,10 @@ int ivpu_ipc_init(struct ivpu_device *vdev)
goto err_free_rx;
}
+ spin_lock_init(&ipc->cons_lock);
INIT_LIST_HEAD(&ipc->cons_list);
- spin_lock_init(&ipc->cons_list_lock);
+ INIT_LIST_HEAD(&ipc->cb_msg_list);
drmm_mutex_init(&vdev->drm, &ipc->lock);
-
ivpu_ipc_reset(vdev);
return 0;
@@ -472,6 +516,13 @@ err_free_tx:
void ivpu_ipc_fini(struct ivpu_device *vdev)
{
+ struct ivpu_ipc_info *ipc = vdev->ipc;
+
+ drm_WARN_ON(&vdev->drm, ipc->on);
+ drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cons_list));
+ drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list));
+ drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0);
+
ivpu_ipc_mem_fini(vdev);
}
@@ -488,16 +539,27 @@ void ivpu_ipc_disable(struct ivpu_device *vdev)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
struct ivpu_ipc_consumer *cons, *c;
- unsigned long flags;
+ struct ivpu_ipc_rx_msg *rx_msg, *r;
+
+ drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list));
mutex_lock(&ipc->lock);
ipc->on = false;
mutex_unlock(&ipc->lock);
- spin_lock_irqsave(&ipc->cons_list_lock, flags);
- list_for_each_entry_safe(cons, c, &ipc->cons_list, link)
+ spin_lock_irq(&ipc->cons_lock);
+ list_for_each_entry_safe(cons, c, &ipc->cons_list, link) {
+ spin_lock(&cons->rx_lock);
+ if (!cons->rx_callback)
+ cons->aborted = true;
+ list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link)
+ ivpu_ipc_rx_msg_del(vdev, rx_msg);
+ spin_unlock(&cons->rx_lock);
wake_up(&cons->rx_msg_wq);
- spin_unlock_irqrestore(&ipc->cons_list_lock, flags);
+ }
+ spin_unlock_irq(&ipc->cons_lock);
+
+ drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0);
}
void ivpu_ipc_reset(struct ivpu_device *vdev)
@@ -505,6 +567,7 @@ void ivpu_ipc_reset(struct ivpu_device *vdev)
struct ivpu_ipc_info *ipc = vdev->ipc;
mutex_lock(&ipc->lock);
+ drm_WARN_ON(&vdev->drm, ipc->on);
memset(ivpu_bo_vaddr(ipc->mem_tx), 0, ivpu_bo_size(ipc->mem_tx));
memset(ivpu_bo_vaddr(ipc->mem_rx), 0, ivpu_bo_size(ipc->mem_rx));
diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h
index 68f5b6668e00..40ca3cc4e61f 100644
--- a/drivers/accel/ivpu/ivpu_ipc.h
+++ b/drivers/accel/ivpu/ivpu_ipc.h
@@ -42,13 +42,26 @@ struct ivpu_ipc_hdr {
u8 status;
} __packed __aligned(IVPU_IPC_ALIGNMENT);
+typedef void (*ivpu_ipc_rx_callback_t)(struct ivpu_device *vdev,
+ struct ivpu_ipc_hdr *ipc_hdr,
+ struct vpu_jsm_msg *jsm_msg);
+
+struct ivpu_ipc_rx_msg {
+ struct list_head link;
+ struct ivpu_ipc_hdr *ipc_hdr;
+ struct vpu_jsm_msg *jsm_msg;
+ ivpu_ipc_rx_callback_t callback;
+};
+
struct ivpu_ipc_consumer {
struct list_head link;
u32 channel;
u32 tx_vpu_addr;
u32 request_id;
+ bool aborted;
+ ivpu_ipc_rx_callback_t rx_callback;
- spinlock_t rx_msg_lock; /* Protects rx_msg_list */
+ spinlock_t rx_lock; /* Protects rx_msg_list and aborted */
struct list_head rx_msg_list;
wait_queue_head_t rx_msg_wq;
};
@@ -60,8 +73,9 @@ struct ivpu_ipc_info {
atomic_t rx_msg_count;
- spinlock_t cons_list_lock; /* Protects cons_list */
+ spinlock_t cons_lock; /* Protects cons_list and cb_msg_list */
struct list_head cons_list;
+ struct list_head cb_msg_list;
atomic_t request_id;
struct mutex lock; /* Lock on status */
@@ -75,19 +89,22 @@ void ivpu_ipc_enable(struct ivpu_device *vdev);
void ivpu_ipc_disable(struct ivpu_device *vdev);
void ivpu_ipc_reset(struct ivpu_device *vdev);
-int ivpu_ipc_irq_handler(struct ivpu_device *vdev);
+void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread);
+irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev);
void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
- u32 channel);
+ u32 channel, ivpu_ipc_rx_callback_t callback);
void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons);
int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
- struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *ipc_payload,
+ struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *jsm_msg,
unsigned long timeout_ms);
+int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+ enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
+ u32 channel, unsigned long timeout_ms);
int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
- enum vpu_ipc_msg_type expected_resp_type,
- struct vpu_jsm_msg *resp, u32 channel,
- unsigned long timeout_ms);
+ enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
+ u32 channel, unsigned long timeout_ms);
#endif /* __IVPU_IPC_H__ */
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 8983e3a4fdf9..7206cf9cdb4a 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -7,7 +7,6 @@
#include <linux/bitfield.h>
#include <linux/highmem.h>
-#include <linux/kthread.h>
#include <linux/pci.h>
#include <linux/module.h>
#include <uapi/drm/ivpu_accel.h>
@@ -24,10 +23,6 @@
#define JOB_ID_CONTEXT_MASK GENMASK(31, 8)
#define JOB_MAX_BUFFER_COUNT 65535
-static unsigned int ivpu_tdr_timeout_ms;
-module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, uint, 0644);
-MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
-
static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq)
{
ivpu_hw_reg_db_set(vdev, cmdq->db_id);
@@ -196,6 +191,8 @@ static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
entry->batch_buf_addr = job->cmd_buf_vpu_addr;
entry->job_id = job->job_id;
entry->flags = 0;
+ if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_SUBMISSION))
+ entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK;
wmb(); /* Ensure that tail is updated after filling entry */
header->tail = next_entry;
wmb(); /* Flush WC buffer for jobq header */
@@ -264,7 +261,7 @@ static void job_release(struct kref *ref)
for (i = 0; i < job->bo_count; i++)
if (job->bos[i])
- drm_gem_object_put(&job->bos[i]->base);
+ drm_gem_object_put(&job->bos[i]->base.base);
dma_fence_put(job->done_fence);
ivpu_file_priv_put(&job->file_priv);
@@ -340,23 +337,12 @@ static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status)
ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n",
job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status);
+ ivpu_stop_job_timeout_detection(vdev);
+
job_put(job);
return 0;
}
-static void ivpu_job_done_message(struct ivpu_device *vdev, void *msg)
-{
- struct vpu_ipc_msg_payload_job_done *payload;
- struct vpu_jsm_msg *job_ret_msg = msg;
- int ret;
-
- payload = (struct vpu_ipc_msg_payload_job_done *)&job_ret_msg->payload;
-
- ret = ivpu_job_done(vdev, payload->job_id, payload->job_status);
- if (ret)
- ivpu_err(vdev, "Failed to finish job %d: %d\n", payload->job_id, ret);
-}
-
void ivpu_jobs_abort_all(struct ivpu_device *vdev)
{
struct ivpu_job *job;
@@ -398,11 +384,13 @@ static int ivpu_direct_job_submission(struct ivpu_job *job)
if (ret)
goto err_xa_erase;
+ ivpu_start_job_timeout_detection(vdev);
+
ivpu_dbg(vdev, JOB, "Job submitted: id %3u addr 0x%llx ctx %2d engine %d next %d\n",
job->job_id, job->cmd_buf_vpu_addr, file_priv->ctx.id,
job->engine_idx, cmdq->jobq->header.tail);
- if (ivpu_test_mode == IVPU_TEST_MODE_NULL_HW) {
+ if (ivpu_test_mode & IVPU_TEST_MODE_NULL_HW) {
ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
cmdq->jobq->header.head = cmdq->jobq->header.tail;
wmb(); /* Flush WC buffer for jobq header */
@@ -448,7 +436,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
}
bo = job->bos[CMD_BUF_IDX];
- if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) {
+ if (!dma_resv_test_signaled(bo->base.base.resv, DMA_RESV_USAGE_READ)) {
ivpu_warn(vdev, "Buffer is already in use\n");
return -EBUSY;
}
@@ -468,7 +456,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
}
for (i = 0; i < buf_count; i++) {
- ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1);
+ ret = dma_resv_reserve_fences(job->bos[i]->base.base.resv, 1);
if (ret) {
ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret);
goto unlock_reservations;
@@ -477,7 +465,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
for (i = 0; i < buf_count; i++) {
usage = (i == CMD_BUF_IDX) ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_BOOKKEEP;
- dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, usage);
+ dma_resv_add_fence(job->bos[i]->base.base.resv, job->done_fence, usage);
}
unlock_reservations:
@@ -562,61 +550,36 @@ free_handles:
return ret;
}
-static int ivpu_job_done_thread(void *arg)
+static void
+ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
+ struct vpu_jsm_msg *jsm_msg)
{
- struct ivpu_device *vdev = (struct ivpu_device *)arg;
- struct ivpu_ipc_consumer cons;
- struct vpu_jsm_msg jsm_msg;
- bool jobs_submitted;
- unsigned int timeout;
+ struct vpu_ipc_msg_payload_job_done *payload;
int ret;
- ivpu_dbg(vdev, JOB, "Started %s\n", __func__);
-
- ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET);
-
- while (!kthread_should_stop()) {
- timeout = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
- jobs_submitted = !xa_empty(&vdev->submitted_jobs_xa);
- ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout);
- if (!ret) {
- ivpu_job_done_message(vdev, &jsm_msg);
- } else if (ret == -ETIMEDOUT) {
- if (jobs_submitted && !xa_empty(&vdev->submitted_jobs_xa)) {
- ivpu_err(vdev, "TDR detected, timeout %d ms", timeout);
- ivpu_hw_diagnose_failure(vdev);
- ivpu_pm_schedule_recovery(vdev);
- }
- }
+ if (!jsm_msg) {
+ ivpu_err(vdev, "IPC message has no JSM payload\n");
+ return;
}
- ivpu_ipc_consumer_del(vdev, &cons);
-
- ivpu_jobs_abort_all(vdev);
+ if (jsm_msg->result != VPU_JSM_STATUS_SUCCESS) {
+ ivpu_err(vdev, "Invalid JSM message result: %d\n", jsm_msg->result);
+ return;
+ }
- ivpu_dbg(vdev, JOB, "Stopped %s\n", __func__);
- return 0;
+ payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload;
+ ret = ivpu_job_done(vdev, payload->job_id, payload->job_status);
+ if (!ret && !xa_empty(&vdev->submitted_jobs_xa))
+ ivpu_start_job_timeout_detection(vdev);
}
-int ivpu_job_done_thread_init(struct ivpu_device *vdev)
+void ivpu_job_done_consumer_init(struct ivpu_device *vdev)
{
- struct task_struct *thread;
-
- thread = kthread_run(&ivpu_job_done_thread, (void *)vdev, "ivpu_job_done_thread");
- if (IS_ERR(thread)) {
- ivpu_err(vdev, "Failed to start job completion thread\n");
- return -EIO;
- }
-
- get_task_struct(thread);
- wake_up_process(thread);
-
- vdev->job_done_thread = thread;
-
- return 0;
+ ivpu_ipc_consumer_add(vdev, &vdev->job_done_consumer,
+ VPU_IPC_CHAN_JOB_RET, ivpu_job_done_callback);
}
-void ivpu_job_done_thread_fini(struct ivpu_device *vdev)
+void ivpu_job_done_consumer_fini(struct ivpu_device *vdev)
{
- kthread_stop_put(vdev->job_done_thread);
+ ivpu_ipc_consumer_del(vdev, &vdev->job_done_consumer);
}
diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h
index 5514c2d8a609..45a2f2ec82e5 100644
--- a/drivers/accel/ivpu/ivpu_job.h
+++ b/drivers/accel/ivpu/ivpu_job.h
@@ -59,8 +59,8 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv);
void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev);
-int ivpu_job_done_thread_init(struct ivpu_device *vdev);
-void ivpu_job_done_thread_fini(struct ivpu_device *vdev);
+void ivpu_job_done_consumer_init(struct ivpu_device *vdev);
+void ivpu_job_done_consumer_fini(struct ivpu_device *vdev);
void ivpu_jobs_abort_all(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c
index 0c2fe7142024..8cea0dd731b9 100644
--- a/drivers/accel/ivpu/ivpu_jsm_msg.c
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.c
@@ -4,6 +4,7 @@
*/
#include "ivpu_drv.h"
+#include "ivpu_hw.h"
#include "ivpu_ipc.h"
#include "ivpu_jsm_msg.h"
@@ -36,6 +37,17 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type)
IVPU_CASE_TO_STR(VPU_JSM_MSG_DESTROY_CMD_QUEUE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES);
IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_REGISTER_DB);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_CMDQ);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SUSPEND_CMDQ);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_ENGINE_RESUME);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP_RSP);
IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT);
IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL);
IVPU_CASE_TO_STR(VPU_JSM_MSG_JOB_DONE);
@@ -65,6 +77,12 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type)
IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP);
IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT_DONE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL_RSP);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_PWR_D0I3_ENTER);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_PWR_D0I3_ENTER_DONE);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_ENABLE);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_ENABLE_DONE);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_DISABLE);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_DISABLE_DONE);
}
#undef IVPU_CASE_TO_STR
@@ -243,3 +261,23 @@ int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid)
return ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SSID_RELEASE_DONE, &resp,
VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
}
+
+int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev)
+{
+ struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_PWR_D0I3_ENTER };
+ struct vpu_jsm_msg resp;
+ int ret;
+
+ if (IVPU_WA(disable_d0i3_msg))
+ return 0;
+
+ req.payload.pwr_d0i3_enter.send_response = 1;
+
+ ret = ivpu_ipc_send_receive_active(vdev, &req, VPU_JSM_MSG_PWR_D0I3_ENTER_DONE,
+ &resp, VPU_IPC_CHAN_GEN_CMD,
+ vdev->timeout.d0i3_entry_msg);
+ if (ret)
+ return ret;
+
+ return ivpu_hw_wait_for_idle(vdev);
+}
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h
index 66979a948c7c..ae75e5dbcc41 100644
--- a/drivers/accel/ivpu/ivpu_jsm_msg.h
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.h
@@ -22,4 +22,5 @@ int ivpu_jsm_trace_get_capability(struct ivpu_device *vdev, u32 *trace_destinati
int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 trace_destination_mask,
u64 trace_hw_component_mask);
int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid);
+int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev);
#endif
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index 2538c78fbebe..2228c44b115f 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -230,7 +230,12 @@
(REG_FLD(IVPU_MMU_REG_GERROR, MSI_PRIQ_ABT)) | \
(REG_FLD(IVPU_MMU_REG_GERROR, MSI_ABT)))
-static char *ivpu_mmu_event_to_str(u32 cmd)
+#define IVPU_MMU_CERROR_NONE 0x0
+#define IVPU_MMU_CERROR_ILL 0x1
+#define IVPU_MMU_CERROR_ABT 0x2
+#define IVPU_MMU_CERROR_ATC_INV_SYNC 0x3
+
+static const char *ivpu_mmu_event_to_str(u32 cmd)
{
switch (cmd) {
case IVPU_MMU_EVT_F_UUT:
@@ -276,6 +281,22 @@ static char *ivpu_mmu_event_to_str(u32 cmd)
}
}
+static const char *ivpu_mmu_cmdq_err_to_str(u32 err)
+{
+ switch (err) {
+ case IVPU_MMU_CERROR_NONE:
+ return "No CMDQ Error";
+ case IVPU_MMU_CERROR_ILL:
+ return "Illegal command";
+ case IVPU_MMU_CERROR_ABT:
+ return "External abort on CMDQ read";
+ case IVPU_MMU_CERROR_ATC_INV_SYNC:
+ return "Sync failed to complete ATS invalidation";
+ default:
+ return "Unknown CMDQ Error";
+ }
+}
+
static void ivpu_mmu_config_check(struct ivpu_device *vdev)
{
u32 val_ref;
@@ -479,10 +500,7 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev)
u64 val;
int ret;
- val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_SYNC) |
- FIELD_PREP(IVPU_MMU_CMD_SYNC_0_CS, 0x2) |
- FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSH, 0x3) |
- FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSI_ATTR, 0xf);
+ val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_SYNC);
ret = ivpu_mmu_cmdq_cmd_write(vdev, "SYNC", val, 0);
if (ret)
@@ -492,8 +510,15 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev)
REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, q->prod);
ret = ivpu_mmu_cmdq_wait_for_cons(vdev);
- if (ret)
- ivpu_err(vdev, "Timed out waiting for consumer: %d\n", ret);
+ if (ret) {
+ u32 err;
+
+ val = REGV_RD32(IVPU_MMU_REG_CMDQ_CONS);
+ err = REG_GET_FLD(IVPU_MMU_REG_CMDQ_CONS, ERR, val);
+
+ ivpu_err(vdev, "Timed out waiting for MMU consumer: %d, error: %s\n", ret,
+ ivpu_mmu_cmdq_err_to_str(err));
+ }
return ret;
}
@@ -750,9 +775,12 @@ int ivpu_mmu_init(struct ivpu_device *vdev)
ivpu_dbg(vdev, MMU, "Init..\n");
- drmm_mutex_init(&vdev->drm, &mmu->lock);
ivpu_mmu_config_check(vdev);
+ ret = drmm_mutex_init(&vdev->drm, &mmu->lock);
+ if (ret)
+ return ret;
+
ret = ivpu_mmu_structs_alloc(vdev);
if (ret)
return ret;
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
index c1050a2df954..12a8c09d4547 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -5,6 +5,9 @@
#include <linux/bitfield.h>
#include <linux/highmem.h>
+#include <linux/set_memory.h>
+
+#include <drm/drm_cache.h>
#include "ivpu_drv.h"
#include "ivpu_hw.h"
@@ -39,12 +42,57 @@
#define IVPU_MMU_ENTRY_MAPPED (IVPU_MMU_ENTRY_FLAG_AF | IVPU_MMU_ENTRY_FLAG_USER | \
IVPU_MMU_ENTRY_FLAG_NG | IVPU_MMU_ENTRY_VALID)
+static void *ivpu_pgtable_alloc_page(struct ivpu_device *vdev, dma_addr_t *dma)
+{
+ dma_addr_t dma_addr;
+ struct page *page;
+ void *cpu;
+
+ page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+ if (!page)
+ return NULL;
+
+ set_pages_array_wc(&page, 1);
+
+ dma_addr = dma_map_page(vdev->drm.dev, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(vdev->drm.dev, dma_addr))
+ goto err_free_page;
+
+ cpu = vmap(&page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+ if (!cpu)
+ goto err_dma_unmap_page;
+
+
+ *dma = dma_addr;
+ return cpu;
+
+err_dma_unmap_page:
+ dma_unmap_page(vdev->drm.dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+err_free_page:
+ put_page(page);
+ return NULL;
+}
+
+static void ivpu_pgtable_free_page(struct ivpu_device *vdev, u64 *cpu_addr, dma_addr_t dma_addr)
+{
+ struct page *page;
+
+ if (cpu_addr) {
+ page = vmalloc_to_page(cpu_addr);
+ vunmap(cpu_addr);
+ dma_unmap_page(vdev->drm.dev, dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ set_pages_array_wb(&page, 1);
+ put_page(page);
+ }
+}
+
static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
{
dma_addr_t pgd_dma;
- pgtable->pgd_dma_ptr = dma_alloc_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma,
- GFP_KERNEL);
+ pgtable->pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma);
if (!pgtable->pgd_dma_ptr)
return -ENOMEM;
@@ -53,13 +101,6 @@ static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtab
return 0;
}
-static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, u64 *cpu_addr, dma_addr_t dma_addr)
-{
- if (cpu_addr)
- dma_free_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, cpu_addr,
- dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK);
-}
-
static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
{
int pgd_idx, pud_idx, pmd_idx;
@@ -84,19 +125,19 @@ static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgt
pte_dma_ptr = pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx];
pte_dma = pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx];
- ivpu_mmu_pgtable_free(vdev, pte_dma_ptr, pte_dma);
+ ivpu_pgtable_free_page(vdev, pte_dma_ptr, pte_dma);
}
kfree(pgtable->pte_ptrs[pgd_idx][pud_idx]);
- ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma);
+ ivpu_pgtable_free_page(vdev, pmd_dma_ptr, pmd_dma);
}
kfree(pgtable->pmd_ptrs[pgd_idx]);
kfree(pgtable->pte_ptrs[pgd_idx]);
- ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma);
+ ivpu_pgtable_free_page(vdev, pud_dma_ptr, pud_dma);
}
- ivpu_mmu_pgtable_free(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
+ ivpu_pgtable_free_page(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
}
static u64*
@@ -108,7 +149,7 @@ ivpu_mmu_ensure_pud(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
if (pud_dma_ptr)
return pud_dma_ptr;
- pud_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pud_dma, GFP_KERNEL);
+ pud_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pud_dma);
if (!pud_dma_ptr)
return NULL;
@@ -131,7 +172,7 @@ err_free_pmd_ptrs:
kfree(pgtable->pmd_ptrs[pgd_idx]);
err_free_pud_dma_ptr:
- ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma);
+ ivpu_pgtable_free_page(vdev, pud_dma_ptr, pud_dma);
return NULL;
}
@@ -145,7 +186,7 @@ ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
if (pmd_dma_ptr)
return pmd_dma_ptr;
- pmd_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL);
+ pmd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pmd_dma);
if (!pmd_dma_ptr)
return NULL;
@@ -160,7 +201,7 @@ ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
return pmd_dma_ptr;
err_free_pmd_dma_ptr:
- ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma);
+ ivpu_pgtable_free_page(vdev, pmd_dma_ptr, pmd_dma);
return NULL;
}
@@ -174,7 +215,7 @@ ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
if (pte_dma_ptr)
return pte_dma_ptr;
- pte_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL);
+ pte_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pte_dma);
if (!pte_dma_ptr)
return NULL;
@@ -249,38 +290,6 @@ static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_ad
ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] = IVPU_MMU_ENTRY_INVALID;
}
-static void
-ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
-{
- struct ivpu_mmu_pgtable *pgtable = &ctx->pgtable;
- u64 end_addr = vpu_addr + size;
-
- /* Align to PMD entry (2 MB) */
- vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1);
-
- while (vpu_addr < end_addr) {
- int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
- u64 pud_end = (pgd_idx + 1) * (u64)IVPU_MMU_PUD_MAP_SIZE;
-
- while (vpu_addr < end_addr && vpu_addr < pud_end) {
- int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr);
- u64 pmd_end = (pud_idx + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE;
-
- while (vpu_addr < end_addr && vpu_addr < pmd_end) {
- int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
-
- clflush_cache_range(pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx],
- IVPU_MMU_PGTABLE_SIZE);
- vpu_addr += IVPU_MMU_PTE_MAP_SIZE;
- }
- clflush_cache_range(pgtable->pmd_ptrs[pgd_idx][pud_idx],
- IVPU_MMU_PGTABLE_SIZE);
- }
- clflush_cache_range(pgtable->pud_ptrs[pgd_idx], IVPU_MMU_PGTABLE_SIZE);
- }
- clflush_cache_range(pgtable->pgd_dma_ptr, IVPU_MMU_PGTABLE_SIZE);
-}
-
static int
ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
u64 vpu_addr, dma_addr_t dma_addr, size_t size, u64 prot)
@@ -327,6 +336,9 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
u64 prot;
u64 i;
+ if (drm_WARN_ON(&vdev->drm, !ctx))
+ return -EINVAL;
+
if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
return -EINVAL;
@@ -349,10 +361,11 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
mutex_unlock(&ctx->lock);
return ret;
}
- ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
vpu_addr += size;
}
+ /* Ensure page table modifications are flushed from wc buffers to memory */
+ wmb();
mutex_unlock(&ctx->lock);
ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
@@ -369,8 +382,8 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct
int ret;
u64 i;
- if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
- ivpu_warn(vdev, "Unaligned vpu_addr: 0x%llx\n", vpu_addr);
+ if (drm_WARN_ON(&vdev->drm, !ctx))
+ return;
mutex_lock(&ctx->lock);
@@ -378,10 +391,11 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct
size_t size = sg_dma_len(sg) + sg->offset;
ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size);
- ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
vpu_addr += size;
}
+ /* Ensure page table modifications are flushed from wc buffers to memory */
+ wmb();
mutex_unlock(&ctx->lock);
ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
@@ -390,28 +404,34 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct
}
int
-ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx,
- const struct ivpu_addr_range *range,
- u64 size, struct drm_mm_node *node)
+ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range,
+ u64 size, struct drm_mm_node *node)
{
- lockdep_assert_held(&ctx->lock);
+ int ret;
+
+ WARN_ON(!range);
+ mutex_lock(&ctx->lock);
if (!ivpu_disable_mmu_cont_pages && size >= IVPU_MMU_CONT_PAGES_SIZE) {
- if (!drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_CONT_PAGES_SIZE, 0,
- range->start, range->end, DRM_MM_INSERT_BEST))
- return 0;
+ ret = drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_CONT_PAGES_SIZE, 0,
+ range->start, range->end, DRM_MM_INSERT_BEST);
+ if (!ret)
+ goto unlock;
}
- return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, 0,
- range->start, range->end, DRM_MM_INSERT_BEST);
+ ret = drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, 0,
+ range->start, range->end, DRM_MM_INSERT_BEST);
+unlock:
+ mutex_unlock(&ctx->lock);
+ return ret;
}
void
-ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, struct drm_mm_node *node)
+ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node)
{
- lockdep_assert_held(&ctx->lock);
-
+ mutex_lock(&ctx->lock);
drm_mm_remove_node(node);
+ mutex_unlock(&ctx->lock);
}
static int
@@ -421,7 +441,6 @@ ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u3
int ret;
mutex_init(&ctx->lock);
- INIT_LIST_HEAD(&ctx->bo_list);
ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable);
if (ret) {
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h
index f15d8c630d8a..535db3a1fc74 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.h
+++ b/drivers/accel/ivpu/ivpu_mmu_context.h
@@ -23,10 +23,9 @@ struct ivpu_mmu_pgtable {
};
struct ivpu_mmu_context {
- struct mutex lock; /* protects: mm, pgtable, bo_list */
+ struct mutex lock; /* Protects: mm, pgtable */
struct drm_mm mm;
struct ivpu_mmu_pgtable pgtable;
- struct list_head bo_list;
u32 id;
};
@@ -39,11 +38,9 @@ int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context
void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid);
-int ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx,
- const struct ivpu_addr_range *range,
- u64 size, struct drm_mm_node *node);
-void ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx,
- struct drm_mm_node *node);
+int ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range,
+ u64 size, struct drm_mm_node *node);
+void ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node);
int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
u64 vpu_addr, struct sg_table *sgt, bool llc_coherent);
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index e9b16cbc26f4..0af8864cb3b5 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -15,6 +15,7 @@
#include "ivpu_fw.h"
#include "ivpu_ipc.h"
#include "ivpu_job.h"
+#include "ivpu_jsm_msg.h"
#include "ivpu_mmu.h"
#include "ivpu_pm.h"
@@ -22,6 +23,10 @@ static bool ivpu_disable_recovery;
module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644);
MODULE_PARM_DESC(disable_recovery, "Disables recovery when VPU hang is detected");
+static unsigned long ivpu_tdr_timeout_ms;
+module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644);
+MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
+
#define PM_RESCHEDULE_LIMIT 5
static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
@@ -69,27 +74,31 @@ retry:
ret = ivpu_hw_power_up(vdev);
if (ret) {
ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
- return ret;
+ goto err_power_down;
}
ret = ivpu_mmu_enable(vdev);
if (ret) {
ivpu_err(vdev, "Failed to resume MMU: %d\n", ret);
- ivpu_hw_power_down(vdev);
- return ret;
+ goto err_power_down;
}
ret = ivpu_boot(vdev);
- if (ret) {
- ivpu_mmu_disable(vdev);
- ivpu_hw_power_down(vdev);
- if (!ivpu_fw_is_cold_boot(vdev)) {
- ivpu_warn(vdev, "Failed to resume the FW: %d. Retrying cold boot..\n", ret);
- ivpu_pm_prepare_cold_boot(vdev);
- goto retry;
- } else {
- ivpu_err(vdev, "Failed to resume the FW: %d\n", ret);
- }
+ if (ret)
+ goto err_mmu_disable;
+
+ return 0;
+
+err_mmu_disable:
+ ivpu_mmu_disable(vdev);
+err_power_down:
+ ivpu_hw_power_down(vdev);
+
+ if (!ivpu_fw_is_cold_boot(vdev)) {
+ ivpu_pm_prepare_cold_boot(vdev);
+ goto retry;
+ } else {
+ ivpu_err(vdev, "Failed to resume the FW: %d\n", ret);
}
return ret;
@@ -136,6 +145,31 @@ void ivpu_pm_schedule_recovery(struct ivpu_device *vdev)
}
}
+static void ivpu_job_timeout_work(struct work_struct *work)
+{
+ struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work);
+ struct ivpu_device *vdev = pm->vdev;
+ unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
+
+ ivpu_err(vdev, "TDR detected, timeout %lu ms", timeout_ms);
+ ivpu_hw_diagnose_failure(vdev);
+
+ ivpu_pm_schedule_recovery(vdev);
+}
+
+void ivpu_start_job_timeout_detection(struct ivpu_device *vdev)
+{
+ unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
+
+ /* No-op if already queued */
+ queue_delayed_work(system_wq, &vdev->pm->job_timeout_work, msecs_to_jiffies(timeout_ms));
+}
+
+void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev)
+{
+ cancel_delayed_work_sync(&vdev->pm->job_timeout_work);
+}
+
int ivpu_pm_suspend_cb(struct device *dev)
{
struct drm_device *drm = dev_get_drvdata(dev);
@@ -153,6 +187,8 @@ int ivpu_pm_suspend_cb(struct device *dev)
}
}
+ ivpu_jsm_pwr_d0i3_enter(vdev);
+
ivpu_suspend(vdev);
ivpu_pm_prepare_warm_boot(vdev);
@@ -188,6 +224,7 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev)
{
struct drm_device *drm = dev_get_drvdata(dev);
struct ivpu_device *vdev = to_ivpu_device(drm);
+ bool hw_is_idle = true;
int ret;
ivpu_dbg(vdev, PM, "Runtime suspend..\n");
@@ -200,11 +237,16 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev)
return -EAGAIN;
}
+ if (!vdev->pm->suspend_reschedule_counter)
+ hw_is_idle = false;
+ else if (ivpu_jsm_pwr_d0i3_enter(vdev))
+ hw_is_idle = false;
+
ret = ivpu_suspend(vdev);
if (ret)
ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret);
- if (!vdev->pm->suspend_reschedule_counter) {
+ if (!hw_is_idle) {
ivpu_warn(vdev, "VPU failed to enter idle, force suspended.\n");
ivpu_pm_prepare_cold_boot(vdev);
} else {
@@ -304,6 +346,7 @@ void ivpu_pm_init(struct ivpu_device *vdev)
atomic_set(&pm->in_reset, 0);
INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work);
+ INIT_DELAYED_WORK(&pm->job_timeout_work, ivpu_job_timeout_work);
if (ivpu_disable_recovery)
delay = -1;
@@ -318,6 +361,7 @@ void ivpu_pm_init(struct ivpu_device *vdev)
void ivpu_pm_cancel_recovery(struct ivpu_device *vdev)
{
+ drm_WARN_ON(&vdev->drm, delayed_work_pending(&vdev->pm->job_timeout_work));
cancel_work_sync(&vdev->pm->recovery_work);
}
diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h
index 044db150be07..97c6e0b0aa42 100644
--- a/drivers/accel/ivpu/ivpu_pm.h
+++ b/drivers/accel/ivpu/ivpu_pm.h
@@ -12,6 +12,7 @@ struct ivpu_device;
struct ivpu_pm_info {
struct ivpu_device *vdev;
+ struct delayed_work job_timeout_work;
struct work_struct recovery_work;
atomic_t in_reset;
atomic_t reset_counter;
@@ -37,5 +38,7 @@ int __must_check ivpu_rpm_get_if_active(struct ivpu_device *vdev);
void ivpu_rpm_put(struct ivpu_device *vdev);
void ivpu_pm_schedule_recovery(struct ivpu_device *vdev);
+void ivpu_start_job_timeout_detection(struct ivpu_device *vdev);
+void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev);
#endif /* __IVPU_PM_H__ */
diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h
index 6b71be92ba65..04c954258563 100644
--- a/drivers/accel/ivpu/vpu_boot_api.h
+++ b/drivers/accel/ivpu/vpu_boot_api.h
@@ -11,7 +11,10 @@
* The bellow values will be used to construct the version info this way:
* fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) |
* VPU_BOOT_API_VER_MINOR;
- * VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes.
+ * VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes
+ * This information is collected by using vpuip_2/application/vpuFirmware/make_std_fw_image.py
+ * If a header is missing this info we ignore the header, if a header is missing or contains
+ * partial info a build error will be generated.
*/
/*
@@ -24,12 +27,12 @@
* Minor version changes when API backward compatibility is preserved.
* Resets to 0 if Major version is incremented.
*/
-#define VPU_BOOT_API_VER_MINOR 12
+#define VPU_BOOT_API_VER_MINOR 20
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
*/
-#define VPU_BOOT_API_VER_PATCH 2
+#define VPU_BOOT_API_VER_PATCH 4
/*
* Index in the API version table
@@ -63,6 +66,12 @@ struct vpu_firmware_header {
/* Size of memory require for firmware execution */
u32 runtime_size;
u32 shave_nn_fw_size;
+ /* Size of primary preemption buffer. */
+ u32 preemption_buffer_1_size;
+ /* Size of secondary preemption buffer. */
+ u32 preemption_buffer_2_size;
+ /* Space reserved for future preemption-related fields. */
+ u32 preemption_reserved[6];
};
/*
@@ -89,6 +98,14 @@ enum VPU_BOOT_L2_CACHE_CFG_TYPE {
VPU_BOOT_L2_CACHE_CFG_NUM = 2
};
+/** VPU MCA ECC signalling mode. By default, no signalling is used */
+enum VPU_BOOT_MCA_ECC_SIGNAL_TYPE {
+ VPU_BOOT_MCA_ECC_NONE = 0,
+ VPU_BOOT_MCA_ECC_CORR = 1,
+ VPU_BOOT_MCA_ECC_FATAL = 2,
+ VPU_BOOT_MCA_ECC_BOTH = 3
+};
+
/**
* Logging destinations.
*
@@ -131,9 +148,11 @@ enum vpu_trace_destination {
#define VPU_TRACE_PROC_BIT_ACT_SHV_3 22
#define VPU_TRACE_PROC_NO_OF_HW_DEVS 23
-/* KMB HW component IDs are sequential, so define first and last IDs. */
-#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_LRT
-#define VPU_TRACE_PROC_BIT_KMB_LAST VPU_TRACE_PROC_BIT_SHV_15
+/* VPU 30xx HW component IDs are sequential, so define first and last IDs. */
+#define VPU_TRACE_PROC_BIT_30XX_FIRST VPU_TRACE_PROC_BIT_LRT
+#define VPU_TRACE_PROC_BIT_30XX_LAST VPU_TRACE_PROC_BIT_SHV_15
+#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_30XX_FIRST
+#define VPU_TRACE_PROC_BIT_KMB_LAST VPU_TRACE_PROC_BIT_30XX_LAST
struct vpu_boot_l2_cache_config {
u8 use;
@@ -148,6 +167,25 @@ struct vpu_warm_boot_section {
u32 is_clear_op;
};
+/*
+ * When HW scheduling mode is enabled, a present period is defined.
+ * It will be used by VPU to swap between normal and focus priorities
+ * to prevent starving of normal priority band (when implemented).
+ * Host must provide a valid value at boot time in
+ * `vpu_focus_present_timer_ms`. If the value provided by the host is not within the
+ * defined range a default value will be used. Here we define the min. and max.
+ * allowed values and the and default value of the present period. Units are milliseconds.
+ */
+#define VPU_PRESENT_CALL_PERIOD_MS_DEFAULT 50
+#define VPU_PRESENT_CALL_PERIOD_MS_MIN 16
+#define VPU_PRESENT_CALL_PERIOD_MS_MAX 10000
+
+/**
+ * Macros to enable various operation modes within the VPU.
+ * To be defined as part of 32 bit mask.
+ */
+#define VPU_OP_MODE_SURVIVABILITY 0x1
+
struct vpu_boot_params {
u32 magic;
u32 vpu_id;
@@ -218,6 +256,7 @@ struct vpu_boot_params {
* the threshold will not be logged); applies to every enabled logging
* destination and loggable HW component. See 'mvLog_t' enum for acceptable
* values.
+ * TODO: EISW-33556: Move log level definition (mvLog_t) to this file.
*/
u32 default_trace_level;
u32 boot_type;
@@ -249,7 +288,36 @@ struct vpu_boot_params {
u32 temp_sensor_period_ms;
/** PLL ratio for efficient clock frequency */
u32 pn_freq_pll_ratio;
- u32 pad4[28];
+ /** DVFS Mode: Default: 0, Max Performance: 1, On Demand: 2, Power Save: 3 */
+ u32 dvfs_mode;
+ /**
+ * Depending on DVFS Mode:
+ * On-demand: Default if 0.
+ * Bit 0-7 - uint8_t: Highest residency percent
+ * Bit 8-15 - uint8_t: High residency percent
+ * Bit 16-23 - uint8_t: Low residency percent
+ * Bit 24-31 - uint8_t: Lowest residency percent
+ * Bit 32-35 - unsigned 4b: PLL Ratio increase amount on highest residency
+ * Bit 36-39 - unsigned 4b: PLL Ratio increase amount on high residency
+ * Bit 40-43 - unsigned 4b: PLL Ratio decrease amount on low residency
+ * Bit 44-47 - unsigned 4b: PLL Ratio decrease amount on lowest frequency
+ * Bit 48-55 - uint8_t: Period (ms) for residency decisions
+ * Bit 56-63 - uint8_t: Averaging windows (as multiples of period. Max: 30 decimal)
+ * Power Save/Max Performance: Unused
+ */
+ u64 dvfs_param;
+ /**
+ * D0i3 delayed entry
+ * Bit0: Disable CPU state save on D0i2 entry flow.
+ * 0: Every D0i2 entry saves state. Save state IPC message ignored.
+ * 1: IPC message required to save state on D0i3 entry flow.
+ */
+ u32 d0i3_delayed_entry;
+ /* Time spent by VPU in D0i3 state */
+ u64 d0i3_residency_time_us;
+ /* Value of VPU perf counter at the time of entering D0i3 state . */
+ u64 d0i3_entry_vpu_ts;
+ u32 pad4[20];
/* Warm boot information: 0x400 - 0x43F */
u32 warm_boot_sections_count;
u32 warm_boot_start_address_reference;
@@ -274,8 +342,12 @@ struct vpu_boot_params {
u32 vpu_scheduling_mode;
/* Present call period in milliseconds. */
u32 vpu_focus_present_timer_ms;
- /* Unused/reserved: 0x478 - 0xFFF */
- u32 pad6[738];
+ /* VPU ECC Signaling */
+ u32 vpu_uses_ecc_mca_signal;
+ /* Values defined by VPU_OP_MODE* macros */
+ u32 vpu_operation_mode;
+ /* Unused/reserved: 0x480 - 0xFFF */
+ u32 pad6[736];
};
/*
diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h
index 2949ec8365bd..7da7622742be 100644
--- a/drivers/accel/ivpu/vpu_jsm_api.h
+++ b/drivers/accel/ivpu/vpu_jsm_api.h
@@ -22,12 +22,12 @@
/*
* Minor version changes when API backward compatibility is preserved.
*/
-#define VPU_JSM_API_VER_MINOR 0
+#define VPU_JSM_API_VER_MINOR 15
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
*/
-#define VPU_JSM_API_VER_PATCH 1
+#define VPU_JSM_API_VER_PATCH 0
/*
* Index in the API version table
@@ -84,11 +84,13 @@
* Job flags bit masks.
*/
#define VPU_JOB_FLAGS_NULL_SUBMISSION_MASK 0x00000001
+#define VPU_JOB_FLAGS_PRIVATE_DATA_MASK 0xFF000000
/*
* Sizes of the reserved areas in jobs, in bytes.
*/
-#define VPU_JOB_RESERVED_BYTES 16
+#define VPU_JOB_RESERVED_BYTES 8
+
/*
* Sizes of the reserved areas in job queues, in bytes.
*/
@@ -109,6 +111,20 @@
#define VPU_DYNDBG_CMD_MAX_LEN 96
/*
+ * For HWS command queue scheduling, we can prioritise command queues inside the
+ * same process with a relative in-process priority. Valid values for relative
+ * priority are given below - max and min.
+ */
+#define VPU_HWS_COMMAND_QUEUE_MAX_IN_PROCESS_PRIORITY 7
+#define VPU_HWS_COMMAND_QUEUE_MIN_IN_PROCESS_PRIORITY -7
+
+/*
+ * For HWS priority scheduling, we can have multiple realtime priority bands.
+ * They are numbered 0 to a MAX.
+ */
+#define VPU_HWS_MAX_REALTIME_PRIORITY_LEVEL 31U
+
+/*
* Job format.
*/
struct vpu_job_queue_entry {
@@ -117,8 +133,14 @@ struct vpu_job_queue_entry {
u32 flags; /**< Flags bit field, see VPU_JOB_FLAGS_* above */
u64 root_page_table_addr; /**< Address of root page table to use for this job */
u64 root_page_table_update_counter; /**< Page tables update events counter */
- u64 preemption_buffer_address; /**< Address of the preemption buffer to use for this job */
- u64 preemption_buffer_size; /**< Size of the preemption buffer to use for this job */
+ u64 primary_preempt_buf_addr;
+ /**< Address of the primary preemption buffer to use for this job */
+ u32 primary_preempt_buf_size;
+ /**< Size of the primary preemption buffer to use for this job */
+ u32 secondary_preempt_buf_size;
+ /**< Size of secondary preemption buffer to use for this job */
+ u64 secondary_preempt_buf_addr;
+ /**< Address of secondary preemption buffer to use for this job */
u8 reserved_0[VPU_JOB_RESERVED_BYTES];
};
@@ -153,6 +175,46 @@ enum vpu_trace_entity_type {
};
/*
+ * HWS specific log buffer header details.
+ * Total size is 32 bytes.
+ */
+struct vpu_hws_log_buffer_header {
+ /* Written by VPU after adding a log entry. Initialised by host to 0. */
+ u32 first_free_entry_index;
+ /* Incremented by VPU every time the VPU overwrites the 0th entry;
+ * initialised by host to 0.
+ */
+ u32 wraparound_count;
+ /*
+ * This is the number of buffers that can be stored in the log buffer provided by the host.
+ * It is written by host before passing buffer to VPU. VPU should consider it read-only.
+ */
+ u64 num_of_entries;
+ u64 reserved[2];
+};
+
+/*
+ * HWS specific log buffer entry details.
+ * Total size is 32 bytes.
+ */
+struct vpu_hws_log_buffer_entry {
+ /* VPU timestamp must be an invariant timer tick (not impacted by DVFS) */
+ u64 vpu_timestamp;
+ /*
+ * Operation type:
+ * 0 - context state change
+ * 1 - queue new work
+ * 2 - queue unwait sync object
+ * 3 - queue no more work
+ * 4 - queue wait sync object
+ */
+ u32 operation_type;
+ u32 reserved;
+ /* Operation data depends on operation type */
+ u64 operation_data[2];
+};
+
+/*
* Host <-> VPU IPC messages types.
*/
enum vpu_ipc_msg_type {
@@ -228,6 +290,23 @@ enum vpu_ipc_msg_type {
* deallocated or reassigned to another context.
*/
VPU_JSM_MSG_HWS_REGISTER_DB = 0x1117,
+ /** Control command: Log buffer setting */
+ VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG = 0x1118,
+ /* Control command: Suspend command queue. */
+ VPU_JSM_MSG_HWS_SUSPEND_CMDQ = 0x1119,
+ /* Control command: Resume command queue */
+ VPU_JSM_MSG_HWS_RESUME_CMDQ = 0x111a,
+ /* Control command: Resume engine after reset */
+ VPU_JSM_MSG_HWS_ENGINE_RESUME = 0x111b,
+ /* Control command: Enable survivability/DCT mode */
+ VPU_JSM_MSG_DCT_ENABLE = 0x111c,
+ /* Control command: Disable survivability/DCT mode */
+ VPU_JSM_MSG_DCT_DISABLE = 0x111d,
+ /**
+ * Dump VPU state. To be used for debug purposes only.
+ * NOTE: Please introduce new ASYNC commands before this one. *
+ */
+ VPU_JSM_MSG_STATE_DUMP = 0x11FF,
/* IPC Host -> Device, General commands */
VPU_JSM_MSG_GENERAL_CMD = 0x1200,
VPU_JSM_MSG_BLOB_DEINIT = VPU_JSM_MSG_GENERAL_CMD,
@@ -236,6 +315,10 @@ enum vpu_ipc_msg_type {
* Linux command: `echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control`.
*/
VPU_JSM_MSG_DYNDBG_CONTROL = 0x1201,
+ /**
+ * Perform the save procedure for the D0i3 entry
+ */
+ VPU_JSM_MSG_PWR_D0I3_ENTER = 0x1202,
/* IPC Device -> Host, Job completion */
VPU_JSM_MSG_JOB_DONE = 0x2100,
/* IPC Device -> Host, Async command completion */
@@ -304,11 +387,35 @@ enum vpu_ipc_msg_type {
VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP = 0x2216,
/** Response to control command: Set context scheduling properties */
VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP = 0x2217,
+ /** Response to control command: Log buffer setting */
+ VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP = 0x2218,
+ /* IPC Device -> Host, HWS notify index entry of log buffer written */
+ VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION = 0x2219,
+ /* IPC Device -> Host, HWS completion of a context suspend request */
+ VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE = 0x221a,
+ /* Response to control command: Resume command queue */
+ VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP = 0x221b,
+ /* Response to control command: Resume engine command response */
+ VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE = 0x221c,
+ /* Response to control command: Enable survivability/DCT mode */
+ VPU_JSM_MSG_DCT_ENABLE_DONE = 0x221d,
+ /* Response to control command: Disable survivability/DCT mode */
+ VPU_JSM_MSG_DCT_DISABLE_DONE = 0x221e,
+ /**
+ * Response to state dump control command.
+ * NOTE: Please introduce new ASYNC responses before this one. *
+ */
+ VPU_JSM_MSG_STATE_DUMP_RSP = 0x22FF,
/* IPC Device -> Host, General command completion */
VPU_JSM_MSG_GENERAL_CMD_DONE = 0x2300,
VPU_JSM_MSG_BLOB_DEINIT_DONE = VPU_JSM_MSG_GENERAL_CMD_DONE,
/** Response to VPU_JSM_MSG_DYNDBG_CONTROL. */
VPU_JSM_MSG_DYNDBG_CONTROL_RSP = 0x2301,
+ /**
+ * Acknowledgment of completion of the save procedure initiated by
+ * VPU_JSM_MSG_PWR_D0I3_ENTER
+ */
+ VPU_JSM_MSG_PWR_D0I3_ENTER_DONE = 0x2302,
};
enum vpu_ipc_msg_status { VPU_JSM_MSG_FREE, VPU_JSM_MSG_ALLOCATED };
@@ -593,12 +700,12 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
* Default quantum in 100ns units for scheduling across processes
* within a priority band
*/
- u64 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS];
+ u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS];
/*
* Default grace period in 100ns units for processes that preempt each
* other within a priority band
*/
- u64 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS];
+ u32 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS];
/*
* For normal priority band, specifies the target VPU percentage
* in situations when it's starved by the focus band.
@@ -608,32 +715,51 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
u32 reserved_0;
};
-/* HWS create command queue request */
+/*
+ * @brief HWS create command queue request.
+ * Host will create a command queue via this command.
+ * Note: Cmdq group is a handle of an object which
+ * may contain one or more command queues.
+ * @see VPU_JSM_MSG_CREATE_CMD_QUEUE
+ * @see VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP
+ */
struct vpu_ipc_msg_payload_hws_create_cmdq {
/* Process id */
u64 process_id;
/* Host SSID */
u32 host_ssid;
- /* Zero Padding */
- u32 reserved;
+ /* Engine for which queue is being created */
+ u32 engine_idx;
+ /*
+ * Cmdq group may be set to 0 or equal to
+ * cmdq_id while each priority band contains
+ * only single engine instances.
+ */
+ u64 cmdq_group;
/* Command queue id */
u64 cmdq_id;
/* Command queue base */
u64 cmdq_base;
/* Command queue size */
u32 cmdq_size;
- /* Reserved */
+ /* Zero padding */
u32 reserved_0;
};
-/* HWS create command queue response */
+/*
+ * @brief HWS create command queue response.
+ * @see VPU_JSM_MSG_CREATE_CMD_QUEUE
+ * @see VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP
+ */
struct vpu_ipc_msg_payload_hws_create_cmdq_rsp {
/* Process id */
u64 process_id;
/* Host SSID */
u32 host_ssid;
- /* Zero Padding */
- u32 reserved;
+ /* Engine for which queue is being created */
+ u32 engine_idx;
+ /* Command queue group */
+ u64 cmdq_group;
/* Command queue id */
u64 cmdq_id;
};
@@ -661,7 +787,7 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties {
/* Inside realtime band assigns a further priority */
u32 realtime_priority_level;
/* Priority relative to other contexts in the same process */
- u32 in_process_priority;
+ s32 in_process_priority;
/* Zero padding / Reserved */
u32 reserved_1;
/* Context quantum relative to other contexts of same priority in the same process */
@@ -694,6 +820,123 @@ struct vpu_jsm_hws_register_db {
u64 cmdq_size;
};
+/*
+ * @brief Structure to set another buffer to be used for scheduling-related logging.
+ * The size of the logging buffer and the number of entries is defined as part of the
+ * buffer itself as described next.
+ * The log buffer received from the host is made up of;
+ * - header: 32 bytes in size, as shown in 'struct vpu_hws_log_buffer_header'.
+ * The header contains the number of log entries in the buffer.
+ * - log entry: 0 to n-1, each log entry is 32 bytes in size, as shown in
+ * 'struct vpu_hws_log_buffer_entry'.
+ * The entry contains the VPU timestamp, operation type and data.
+ * The host should provide the notify index value of log buffer to VPU. This is a
+ * value defined within the log buffer and when written to will generate the
+ * scheduling log notification.
+ * The host should set engine_idx and vpu_log_buffer_va to 0 to disable logging
+ * for a particular engine.
+ * VPU will handle one log buffer for each of supported engines.
+ * VPU should allow the logging to consume one host_ssid.
+ * @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG
+ * @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP
+ * @see VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION
+ */
+struct vpu_ipc_msg_payload_hws_set_scheduling_log {
+ /* Engine ordinal */
+ u32 engine_idx;
+ /* Host SSID */
+ u32 host_ssid;
+ /*
+ * VPU log buffer virtual address.
+ * Set to 0 to disable logging for this engine.
+ */
+ u64 vpu_log_buffer_va;
+ /*
+ * Notify index of log buffer. VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION
+ * is generated when an event log is written to this index.
+ */
+ u64 notify_index;
+};
+
+/*
+ * @brief The scheduling log notification is generated by VPU when it writes
+ * an event into the log buffer at the notify_index. VPU notifies host with
+ * VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION. This is an asynchronous
+ * message from VPU to host.
+ * @see VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION
+ * @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG
+ */
+struct vpu_ipc_msg_payload_hws_scheduling_log_notification {
+ /* Engine ordinal */
+ u32 engine_idx;
+ /* Zero Padding */
+ u32 reserved_0;
+};
+
+/*
+ * @brief HWS suspend command queue request and done structure.
+ * Host will request the suspend of contexts and VPU will;
+ * - Suspend all work on this context
+ * - Preempt any running work
+ * - Asynchronously perform the above and return success immediately once
+ * all items above are started successfully
+ * - Notify the host of completion of these operations via
+ * VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE
+ * - Reject any other context operations on a context with an in-flight
+ * suspend request running
+ * Same structure used when VPU notifies host of completion of a context suspend
+ * request. The ids and suspend fence value reported in this command will match
+ * the one in the request from the host to suspend the context. Once suspend is
+ * complete, VPU will not access any data relating to this command queue until
+ * it is resumed.
+ * @see VPU_JSM_MSG_HWS_SUSPEND_CMDQ
+ * @see VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE
+ */
+struct vpu_ipc_msg_payload_hws_suspend_cmdq {
+ /* Host SSID */
+ u32 host_ssid;
+ /* Zero Padding */
+ u32 reserved_0;
+ /* Command queue id */
+ u64 cmdq_id;
+ /*
+ * Suspend fence value - reported by the VPU suspend context
+ * completed once suspend is complete.
+ */
+ u64 suspend_fence_value;
+};
+
+/*
+ * @brief HWS Resume command queue request / response structure.
+ * Host will request the resume of a context;
+ * - VPU will resume all work on this context
+ * - Scheduler will allow this context to be scheduled
+ * @see VPU_JSM_MSG_HWS_RESUME_CMDQ
+ * @see VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP
+ */
+struct vpu_ipc_msg_payload_hws_resume_cmdq {
+ /* Host SSID */
+ u32 host_ssid;
+ /* Zero Padding */
+ u32 reserved_0;
+ /* Command queue id */
+ u64 cmdq_id;
+};
+
+/*
+ * @brief HWS Resume engine request / response structure.
+ * After a HWS engine reset, all scheduling is stopped on VPU until a engine resume.
+ * Host shall send this command to resume scheduling of any valid queue.
+ * @see VPU_JSM_MSG_HWS_RESUME_ENGINE
+ * @see VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE
+ */
+struct vpu_ipc_msg_payload_hws_resume_engine {
+ /* Engine to be resumed */
+ u32 engine_idx;
+ /* Reserved */
+ u32 reserved_0;
+};
+
/**
* Payload for VPU_JSM_MSG_TRACE_SET_CONFIG[_RSP] and
* VPU_JSM_MSG_TRACE_GET_CONFIG_RSP messages.
@@ -938,6 +1181,35 @@ struct vpu_ipc_msg_payload_dyndbg_control {
char dyndbg_cmd[VPU_DYNDBG_CMD_MAX_LEN];
};
+/**
+ * Payload for VPU_JSM_MSG_PWR_D0I3_ENTER
+ *
+ * This is a bi-directional payload.
+ */
+struct vpu_ipc_msg_payload_pwr_d0i3_enter {
+ /**
+ * 0: VPU_JSM_MSG_PWR_D0I3_ENTER_DONE is not sent to the host driver
+ * The driver will poll for D0i2 Idle state transitions.
+ * 1: VPU_JSM_MSG_PWR_D0I3_ENTER_DONE is sent after VPU state save is complete
+ */
+ u32 send_response;
+ u32 reserved_0;
+};
+
+/**
+ * Payload for VPU_JSM_MSG_DCT_ENABLE message.
+ *
+ * Default values for DCT active/inactive times are 5.3ms and 30ms respectively,
+ * corresponding to a 85% duty cycle. This payload allows the host to tune these
+ * values according to application requirements.
+ */
+struct vpu_ipc_msg_payload_pwr_dct_control {
+ /** Duty cycle active time in microseconds */
+ u32 dct_active_us;
+ /** Duty cycle inactive time in microseconds */
+ u32 dct_inactive_us;
+};
+
/*
* Payloads union, used to define complete message format.
*/
@@ -974,6 +1246,13 @@ union vpu_ipc_msg_payload {
struct vpu_ipc_msg_payload_hws_destroy_cmdq hws_destroy_cmdq;
struct vpu_ipc_msg_payload_hws_set_context_sched_properties
hws_set_context_sched_properties;
+ struct vpu_ipc_msg_payload_hws_set_scheduling_log hws_set_scheduling_log;
+ struct vpu_ipc_msg_payload_hws_scheduling_log_notification hws_scheduling_log_notification;
+ struct vpu_ipc_msg_payload_hws_suspend_cmdq hws_suspend_cmdq;
+ struct vpu_ipc_msg_payload_hws_resume_cmdq hws_resume_cmdq;
+ struct vpu_ipc_msg_payload_hws_resume_engine hws_resume_engine;
+ struct vpu_ipc_msg_payload_pwr_d0i3_enter pwr_d0i3_enter;
+ struct vpu_ipc_msg_payload_pwr_dct_control pwr_dct_control;
};
/*