accel/ivpu: Add command buffer submission logic

Each of the user contexts has two command queues, one for compute engine and one for the copy engine. Command queues are allocated and registered in the device when the first job (command buffer) is submitted from the user space to the VPU device. The userspace provides a list of GEM buffer object handles to submit to the VPU, the driver resolves buffer handles, pins physical memory if needed, increments ref count for each buffer and stores pointers to buffer objects in the ivpu_job objects that track jobs submitted to the device. The VPU signals job completion with an asynchronous message that contains the job id passed to firmware when the job was submitted. Currently, the driver supports simple scheduling logic where jobs submitted from user space are immediately pushed to the VPU device command queues. In the future, it will be extended to use hardware base scheduling and/or drm_sched. Co-developed-by: Andrzej Kacprowski <andrzej.kacprowski@linux.intel.com> Signed-off-by: Andrzej Kacprowski <andrzej.kacprowski@linux.intel.com> Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20230117092723.60441-7-jacek.lawrynowicz@linux.intel.com
author: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> 2023-01-17 10:27:22 +0100
committer: Daniel Vetter <daniel.vetter@ffwll.ch> 2023-01-19 11:12:00 +0100
commit: cd7272215c44676dba236491941c6c406701cc5e (patch)
tree: e97a05cd4d479ebaaad330dfdb051feea8231a5e /drivers
parent: 02d5b0aacd0590dbaf25f35834631e5bc11002e3 (diff)
7 files changed, 732 insertions, 4 deletions
diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile
index 9fa6a76e9d79..e61ece53a9ae 100644
--- a/drivers/accel/ivpu/Makefile
+++ b/drivers/accel/ivpu/Makefile
@@ -7,6 +7,7 @@ intel_vpu-y := \
 	ivpu_gem.o \
 	ivpu_hw_mtl.o \
 	ivpu_ipc.o \
+	ivpu_job.o \
 	ivpu_jsm_msg.o \
 	ivpu_mmu.o \
 	ivpu_mmu_context.o
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 0b43e880c99c..c893c8ac6bed 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -20,6 +20,7 @@
 #include "ivpu_gem.h"
 #include "ivpu_hw.h"
 #include "ivpu_ipc.h"
+#include "ivpu_job.h"
 #include "ivpu_jsm_msg.h"
 #include "ivpu_mmu.h"
 #include "ivpu_mmu_context.h"
@@ -31,6 +32,8 @@
 
 static const struct drm_driver driver;
 
+static struct lock_class_key submitted_jobs_xa_lock_class_key;
+
 int ivpu_dbg_mask;
 module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644);
 MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros.");
@@ -84,8 +87,11 @@ static void file_priv_release(struct kref *ref)
 
 	ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id);
 
+	ivpu_cmdq_release_all(file_priv);
+	ivpu_bo_remove_all_bos_from_context(&file_priv->ctx);
 	ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
 	drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv);
+	mutex_destroy(&file_priv->lock);
 	kfree(file_priv);
 }
 
@@ -209,10 +215,11 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file)
 	file_priv->vdev = vdev;
 	file_priv->priority = DRM_IVPU_CONTEXT_PRIORITY_NORMAL;
 	kref_init(&file_priv->ref);
+	mutex_init(&file_priv->lock);
 
 	ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id);
 	if (ret)
-		goto err_free_file_priv;
+		goto err_mutex_destroy;
 
 	old = xa_store_irq(&vdev->context_xa, ctx_id, file_priv, GFP_KERNEL);
 	if (xa_is_err(old)) {
@@ -229,7 +236,8 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file)
 
 err_ctx_fini:
 	ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
-err_free_file_priv:
+err_mutex_destroy:
+	mutex_destroy(&file_priv->lock);
 	kfree(file_priv);
 err_xa_erase:
 	xa_erase_irq(&vdev->context_xa, ctx_id);
@@ -252,6 +260,8 @@ static const struct drm_ioctl_desc ivpu_drm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(IVPU_SET_PARAM, ivpu_set_param_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(IVPU_BO_CREATE, ivpu_bo_create_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(IVPU_BO_INFO, ivpu_bo_info_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(IVPU_SUBMIT, ivpu_submit_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(IVPU_BO_WAIT, ivpu_bo_wait_ioctl, 0),
 };
 
 static int ivpu_wait_for_ready(struct ivpu_device *vdev)
@@ -458,6 +468,8 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
 	vdev->context_xa_limit.max = IVPU_CONTEXT_LIMIT;
 	atomic64_set(&vdev->unique_id_counter, 0);
 	xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC);
+	xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
+	lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
 
 	ret = ivpu_pci_init(vdev);
 	if (ret) {
@@ -509,20 +521,30 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
 		goto err_fw_fini;
 	}
 
+	ret = ivpu_job_done_thread_init(vdev);
+	if (ret) {
+		ivpu_err(vdev, "Failed to initialize job done thread: %d\n", ret);
+		goto err_ipc_fini;
+	}
+
 	ret = ivpu_fw_load(vdev);
 	if (ret) {
 		ivpu_err(vdev, "Failed to load firmware: %d\n", ret);
-		goto err_fw_fini;
+		goto err_job_done_thread_fini;
 	}
 
 	ret = ivpu_boot(vdev);
 	if (ret) {
 		ivpu_err(vdev, "Failed to boot: %d\n", ret);
-		goto err_fw_fini;
+		goto err_job_done_thread_fini;
 	}
 
 	return 0;
 
+err_job_done_thread_fini:
+	ivpu_job_done_thread_fini(vdev);
+err_ipc_fini:
+	ivpu_ipc_fini(vdev);
 err_fw_fini:
 	ivpu_fw_fini(vdev);
 err_mmu_gctx_fini:
@@ -530,6 +552,7 @@ err_mmu_gctx_fini:
 err_power_down:
 	ivpu_hw_power_down(vdev);
 err_xa_destroy:
+	xa_destroy(&vdev->submitted_jobs_xa);
 	xa_destroy(&vdev->context_xa);
 	return ret;
 }
@@ -537,10 +560,13 @@ err_xa_destroy:
 static void ivpu_dev_fini(struct ivpu_device *vdev)
 {
 	ivpu_shutdown(vdev);
+	ivpu_job_done_thread_fini(vdev);
 	ivpu_ipc_fini(vdev);
 	ivpu_fw_fini(vdev);
 	ivpu_mmu_global_context_fini(vdev);
 
+	drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa));
+	xa_destroy(&vdev->submitted_jobs_xa);
 	drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa));
 	xa_destroy(&vdev->context_xa);
 }
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 4f2c4268f1dd..5e63c70c47b3 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -94,6 +94,9 @@ struct ivpu_device {
 	struct xarray context_xa;
 	struct xa_limit context_xa_limit;
 
+	struct xarray submitted_jobs_xa;
+	struct task_struct *job_done_thread;
+
 	atomic64_t unique_id_counter;
 
 	struct {
@@ -111,6 +114,8 @@ struct ivpu_device {
 struct ivpu_file_priv {
 	struct kref ref;
 	struct ivpu_device *vdev;
+	struct mutex lock; /* Protects cmdq */
+	struct ivpu_cmdq *cmdq[IVPU_NUM_ENGINES];
 	struct ivpu_mmu_context ctx;
 	u32 priority;
 	bool has_mmu_faults;
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index b938359b19af..d1f923971b4c 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -678,6 +678,32 @@ unlock:
 	return ret;
 }
 
+int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_ivpu_bo_wait *args = data;
+	struct drm_gem_object *obj;
+	unsigned long timeout;
+	long ret;
+
+	timeout = drm_timeout_abs_to_jiffies(args->timeout_ns);
+
+	obj = drm_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -EINVAL;
+
+	ret = dma_resv_wait_timeout(obj->resv, DMA_RESV_USAGE_READ, true, timeout);
+	if (ret == 0) {
+		ret = -ETIMEDOUT;
+	} else if (ret > 0) {
+		ret = 0;
+		args->job_status = to_ivpu_bo(obj)->job_status;
+	}
+
+	drm_gem_object_put(obj);
+
+	return ret;
+}
+
 static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
 {
 	unsigned long dma_refcount = 0;
diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h
index 1891c90702c2..6b0ceda5f253 100644
--- a/drivers/accel/ivpu/ivpu_gem.h
+++ b/drivers/accel/ivpu/ivpu_gem.h
@@ -30,6 +30,7 @@ struct ivpu_bo {
 	u32 handle;
 	u32 flags;
 	uintptr_t user_ptr;
+	u32 job_status;
 };
 
 enum ivpu_bo_type {
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
new file mode 100644
index 000000000000..23cdb3b3f1f3
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -0,0 +1,602 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#include <drm/drm_file.h>
+
+#include <linux/bitfield.h>
+#include <linux/highmem.h>
+#include <linux/kthread.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <uapi/drm/ivpu_accel.h>
+
+#include "ivpu_drv.h"
+#include "ivpu_hw.h"
+#include "ivpu_ipc.h"
+#include "ivpu_job.h"
+#include "ivpu_jsm_msg.h"
+
+#define CMD_BUF_IDX	     0
+#define JOB_ID_JOB_MASK	     GENMASK(7, 0)
+#define JOB_ID_CONTEXT_MASK  GENMASK(31, 8)
+#define JOB_MAX_BUFFER_COUNT 65535
+
+static unsigned int ivpu_tdr_timeout_ms;
+module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, uint, 0644);
+MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
+
+static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq)
+{
+	ivpu_hw_reg_db_set(vdev, cmdq->db_id);
+}
+
+static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv, u16 engine)
+{
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct vpu_job_queue_header *jobq_header;
+	struct ivpu_cmdq *cmdq;
+
+	cmdq = kzalloc(sizeof(*cmdq), GFP_KERNEL);
+	if (!cmdq)
+		return NULL;
+
+	cmdq->mem = ivpu_bo_alloc_internal(vdev, 0, SZ_4K, DRM_IVPU_BO_WC);
+	if (!cmdq->mem)
+		goto cmdq_free;
+
+	cmdq->db_id = file_priv->ctx.id + engine * ivpu_get_context_count(vdev);
+	cmdq->entry_count = (u32)((cmdq->mem->base.size - sizeof(struct vpu_job_queue_header)) /
+				  sizeof(struct vpu_job_queue_entry));
+
+	cmdq->jobq = (struct vpu_job_queue *)cmdq->mem->kvaddr;
+	jobq_header = &cmdq->jobq->header;
+	jobq_header->engine_idx = engine;
+	jobq_header->head = 0;
+	jobq_header->tail = 0;
+	wmb(); /* Flush WC buffer for jobq->header */
+
+	return cmdq;
+
+cmdq_free:
+	kfree(cmdq);
+	return NULL;
+}
+
+static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
+{
+	if (!cmdq)
+		return;
+
+	ivpu_bo_free_internal(cmdq->mem);
+	kfree(cmdq);
+}
+
+static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u16 engine)
+{
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct ivpu_cmdq *cmdq = file_priv->cmdq[engine];
+	int ret;
+
+	lockdep_assert_held(&file_priv->lock);
+
+	if (!cmdq) {
+		cmdq = ivpu_cmdq_alloc(file_priv, engine);
+		if (!cmdq)
+			return NULL;
+		file_priv->cmdq[engine] = cmdq;
+	}
+
+	if (cmdq->db_registered)
+		return cmdq;
+
+	ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id,
+				   cmdq->mem->vpu_addr, cmdq->mem->base.size);
+	if (ret)
+		return NULL;
+
+	cmdq->db_registered = true;
+
+	return cmdq;
+}
+
+static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engine)
+{
+	struct ivpu_cmdq *cmdq = file_priv->cmdq[engine];
+
+	lockdep_assert_held(&file_priv->lock);
+
+	if (cmdq) {
+		file_priv->cmdq[engine] = NULL;
+		if (cmdq->db_registered)
+			ivpu_jsm_unregister_db(file_priv->vdev, cmdq->db_id);
+
+		ivpu_cmdq_free(file_priv, cmdq);
+	}
+}
+
+void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv)
+{
+	int i;
+
+	mutex_lock(&file_priv->lock);
+
+	for (i = 0; i < IVPU_NUM_ENGINES; i++)
+		ivpu_cmdq_release_locked(file_priv, i);
+
+	mutex_unlock(&file_priv->lock);
+}
+
+/*
+ * Mark the doorbell as unregistered and reset job queue pointers.
+ * This function needs to be called when the VPU hardware is restarted
+ * and FW looses job queue state. The next time job queue is used it
+ * will be registered again.
+ */
+static void ivpu_cmdq_reset_locked(struct ivpu_file_priv *file_priv, u16 engine)
+{
+	struct ivpu_cmdq *cmdq = file_priv->cmdq[engine];
+
+	lockdep_assert_held(&file_priv->lock);
+
+	if (cmdq) {
+		cmdq->db_registered = false;
+		cmdq->jobq->header.head = 0;
+		cmdq->jobq->header.tail = 0;
+		wmb(); /* Flush WC buffer for jobq header */
+	}
+}
+
+static void ivpu_cmdq_reset_all(struct ivpu_file_priv *file_priv)
+{
+	int i;
+
+	mutex_lock(&file_priv->lock);
+
+	for (i = 0; i < IVPU_NUM_ENGINES; i++)
+		ivpu_cmdq_reset_locked(file_priv, i);
+
+	mutex_unlock(&file_priv->lock);
+}
+
+void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev)
+{
+	struct ivpu_file_priv *file_priv;
+	unsigned long ctx_id;
+
+	xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
+		file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id);
+		if (!file_priv)
+			continue;
+
+		ivpu_cmdq_reset_all(file_priv);
+
+		ivpu_file_priv_put(&file_priv);
+	}
+}
+
+static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
+{
+	struct ivpu_device *vdev = job->vdev;
+	struct vpu_job_queue_header *header = &cmdq->jobq->header;
+	struct vpu_job_queue_entry *entry;
+	u32 tail = READ_ONCE(header->tail);
+	u32 next_entry = (tail + 1) % cmdq->entry_count;
+
+	/* Check if there is space left in job queue */
+	if (next_entry == header->head) {
+		ivpu_dbg(vdev, JOB, "Job queue full: ctx %d engine %d db %d head %d tail %d\n",
+			 job->file_priv->ctx.id, job->engine_idx, cmdq->db_id, header->head, tail);
+		return -EBUSY;
+	}
+
+	entry = &cmdq->jobq->job[tail];
+	entry->batch_buf_addr = job->cmd_buf_vpu_addr;
+	entry->job_id = job->job_id;
+	entry->flags = 0;
+	wmb(); /* Ensure that tail is updated after filling entry */
+	header->tail = next_entry;
+	wmb(); /* Flush WC buffer for jobq header */
+
+	return 0;
+}
+
+struct ivpu_fence {
+	struct dma_fence base;
+	spinlock_t lock; /* protects base */
+	struct ivpu_device *vdev;
+};
+
+static inline struct ivpu_fence *to_vpu_fence(struct dma_fence *fence)
+{
+	return container_of(fence, struct ivpu_fence, base);
+}
+
+static const char *ivpu_fence_get_driver_name(struct dma_fence *fence)
+{
+	return DRIVER_NAME;
+}
+
+static const char *ivpu_fence_get_timeline_name(struct dma_fence *fence)
+{
+	struct ivpu_fence *ivpu_fence = to_vpu_fence(fence);
+
+	return dev_name(ivpu_fence->vdev->drm.dev);
+}
+
+static const struct dma_fence_ops ivpu_fence_ops = {
+	.get_driver_name = ivpu_fence_get_driver_name,
+	.get_timeline_name = ivpu_fence_get_timeline_name,
+};
+
+static struct dma_fence *ivpu_fence_create(struct ivpu_device *vdev)
+{
+	struct ivpu_fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	fence->vdev = vdev;
+	spin_lock_init(&fence->lock);
+	dma_fence_init(&fence->base, &ivpu_fence_ops, &fence->lock, dma_fence_context_alloc(1), 1);
+
+	return &fence->base;
+}
+
+static void job_get(struct ivpu_job *job, struct ivpu_job **link)
+{
+	struct ivpu_device *vdev = job->vdev;
+
+	kref_get(&job->ref);
+	*link = job;
+
+	ivpu_dbg(vdev, KREF, "Job get: id %u refcount %u\n", job->job_id, kref_read(&job->ref));
+}
+
+static void job_release(struct kref *ref)
+{
+	struct ivpu_job *job = container_of(ref, struct ivpu_job, ref);
+	struct ivpu_device *vdev = job->vdev;
+	u32 i;
+
+	for (i = 0; i < job->bo_count; i++)
+		if (job->bos[i])
+			drm_gem_object_put(&job->bos[i]->base);
+
+	dma_fence_put(job->done_fence);
+	ivpu_file_priv_put(&job->file_priv);
+
+	ivpu_dbg(vdev, KREF, "Job released: id %u\n", job->job_id);
+	kfree(job);
+}
+
+static void job_put(struct ivpu_job *job)
+{
+	struct ivpu_device *vdev = job->vdev;
+
+	ivpu_dbg(vdev, KREF, "Job put: id %u refcount %u\n", job->job_id, kref_read(&job->ref));
+	kref_put(&job->ref, job_release);
+}
+
+static struct ivpu_job *
+ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count)
+{
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct ivpu_job *job;
+	size_t buf_size;
+
+	buf_size = sizeof(*job) + bo_count * sizeof(struct ivpu_bo *);
+	job = kzalloc(buf_size, GFP_KERNEL);
+	if (!job)
+		return NULL;
+
+	kref_init(&job->ref);
+
+	job->vdev = vdev;
+	job->engine_idx = engine_idx;
+	job->bo_count = bo_count;
+	job->done_fence = ivpu_fence_create(vdev);
+	if (!job->done_fence) {
+		ivpu_warn_ratelimited(vdev, "Failed to create a fence\n");
+		goto err_free_job;
+	}
+
+	job->file_priv = ivpu_file_priv_get(file_priv);
+
+	ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx);
+
+	return job;
+
+err_free_job:
+	kfree(job);
+	return NULL;
+}
+
+static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status)
+{
+	struct ivpu_job *job;
+
+	job = xa_erase(&vdev->submitted_jobs_xa, job_id);
+	if (!job)
+		return -ENOENT;
+
+	if (job->file_priv->has_mmu_faults)
+		job_status = VPU_JSM_STATUS_ABORTED;
+
+	job->bos[CMD_BUF_IDX]->job_status = job_status;
+	dma_fence_signal(job->done_fence);
+
+	ivpu_dbg(vdev, JOB, "Job complete:  id %3u ctx %2d engine %d status 0x%x\n",
+		 job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status);
+
+	job_put(job);
+	return 0;
+}
+
+static void ivpu_job_done_message(struct ivpu_device *vdev, void *msg)
+{
+	struct vpu_ipc_msg_payload_job_done *payload;
+	struct vpu_jsm_msg *job_ret_msg = msg;
+	int ret;
+
+	payload = (struct vpu_ipc_msg_payload_job_done *)&job_ret_msg->payload;
+
+	ret = ivpu_job_done(vdev, payload->job_id, payload->job_status);
+	if (ret)
+		ivpu_err(vdev, "Failed to finish job %d: %d\n", payload->job_id, ret);
+}
+
+void ivpu_jobs_abort_all(struct ivpu_device *vdev)
+{
+	struct ivpu_job *job;
+	unsigned long id;
+
+	xa_for_each(&vdev->submitted_jobs_xa, id, job)
+		ivpu_job_done(vdev, id, VPU_JSM_STATUS_ABORTED);
+}
+
+static int ivpu_direct_job_submission(struct ivpu_job *job)
+{
+	struct ivpu_file_priv *file_priv = job->file_priv;
+	struct ivpu_device *vdev = job->vdev;
+	struct xa_limit job_id_range;
+	struct ivpu_cmdq *cmdq;
+	int ret;
+
+	mutex_lock(&file_priv->lock);
+
+	cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx);
+	if (!cmdq) {
+		ivpu_warn(vdev, "Failed get job queue, ctx %d engine %d\n",
+			  file_priv->ctx.id, job->engine_idx);
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1));
+	job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK;
+
+	job_get(job, &job);
+	ret = xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL);
+	if (ret) {
+		ivpu_warn_ratelimited(vdev, "Failed to allocate job id: %d\n", ret);
+		goto err_job_put;
+	}
+
+	ret = ivpu_cmdq_push_job(cmdq, job);
+	if (ret)
+		goto err_xa_erase;
+
+	ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d next %d\n",
+		 job->job_id, file_priv->ctx.id, job->engine_idx, cmdq->jobq->header.tail);
+
+	if (ivpu_test_mode == IVPU_TEST_MODE_NULL_HW) {
+		ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
+		cmdq->jobq->header.head = cmdq->jobq->header.tail;
+		wmb(); /* Flush WC buffer for jobq header */
+	} else {
+		ivpu_cmdq_ring_db(vdev, cmdq);
+	}
+
+	mutex_unlock(&file_priv->lock);
+	return 0;
+
+err_xa_erase:
+	xa_erase(&vdev->submitted_jobs_xa, job->job_id);
+err_job_put:
+	job_put(job);
+err_unlock:
+	mutex_unlock(&file_priv->lock);
+	return ret;
+}
+
+static int
+ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 *buf_handles,
+				u32 buf_count, u32 commands_offset)
+{
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct ww_acquire_ctx acquire_ctx;
+	struct ivpu_bo *bo;
+	int ret;
+	u32 i;
+
+	for (i = 0; i < buf_count; i++) {
+		struct drm_gem_object *obj = drm_gem_object_lookup(file, buf_handles[i]);
+
+		if (!obj)
+			return -ENOENT;
+
+		job->bos[i] = to_ivpu_bo(obj);
+
+		ret = ivpu_bo_pin(job->bos[i]);
+		if (ret)
+			return ret;
+	}
+
+	bo = job->bos[CMD_BUF_IDX];
+	if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) {
+		ivpu_warn(vdev, "Buffer is already in use\n");
+		return -EBUSY;
+	}
+
+	if (commands_offset >= bo->base.size) {
+		ivpu_warn(vdev, "Invalid command buffer offset %u\n", commands_offset);
+		return -EINVAL;
+	}
+
+	job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset;
+
+	ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count,
+					&acquire_ctx);
+	if (ret) {
+		ivpu_warn(vdev, "Failed to lock reservations: %d\n", ret);
+		return ret;
+	}
+
+	for (i = 0; i < buf_count; i++) {
+		ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1);
+		if (ret) {
+			ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret);
+			goto unlock_reservations;
+		}
+	}
+
+	for (i = 0; i < buf_count; i++)
+		dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE);
+
+unlock_reservations:
+	drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, buf_count, &acquire_ctx);
+
+	wmb(); /* Flush write combining buffers */
+
+	return ret;
+}
+
+int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	int ret = 0;
+	struct ivpu_file_priv *file_priv = file->driver_priv;
+	struct ivpu_device *vdev = file_priv->vdev;
+	struct drm_ivpu_submit *params = data;
+	struct ivpu_job *job;
+	u32 *buf_handles;
+
+	if (params->engine > DRM_IVPU_ENGINE_COPY)
+		return -EINVAL;
+
+	if (params->buffer_count == 0 || params->buffer_count > JOB_MAX_BUFFER_COUNT)
+		return -EINVAL;
+
+	if (!IS_ALIGNED(params->commands_offset, 8))
+		return -EINVAL;
+
+	if (!file_priv->ctx.id)
+		return -EINVAL;
+
+	if (file_priv->has_mmu_faults)
+		return -EBADFD;
+
+	buf_handles = kcalloc(params->buffer_count, sizeof(u32), GFP_KERNEL);
+	if (!buf_handles)
+		return -ENOMEM;
+
+	ret = copy_from_user(buf_handles,
+			     (void __user *)params->buffers_ptr,
+			     params->buffer_count * sizeof(u32));
+	if (ret) {
+		ret = -EFAULT;
+		goto free_handles;
+	}
+
+	ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n",
+		 file_priv->ctx.id, params->buffer_count);
+
+	job = ivpu_create_job(file_priv, params->engine, params->buffer_count);
+	if (!job) {
+		ivpu_err(vdev, "Failed to create job\n");
+		ret = -ENOMEM;
+		goto free_handles;
+	}
+
+	ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, params->buffer_count,
+					      params->commands_offset);
+	if (ret) {
+		ivpu_err(vdev, "Failed to prepare job, ret %d\n", ret);
+		goto job_put;
+	}
+
+	ret = ivpu_direct_job_submission(job);
+	if (ret) {
+		dma_fence_signal(job->done_fence);
+		ivpu_err(vdev, "Failed to submit job to the HW, ret %d\n", ret);
+	}
+
+job_put:
+	job_put(job);
+free_handles:
+	kfree(buf_handles);
+
+	return ret;
+}
+
+static int ivpu_job_done_thread(void *arg)
+{
+	struct ivpu_device *vdev = (struct ivpu_device *)arg;
+	struct ivpu_ipc_consumer cons;
+	struct vpu_jsm_msg jsm_msg;
+	bool jobs_submitted;
+	unsigned int timeout;
+	int ret;
+
+	ivpu_dbg(vdev, JOB, "Started %s\n", __func__);
+
+	ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET);
+
+	while (!kthread_should_stop()) {
+		timeout = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
+		jobs_submitted = !xa_empty(&vdev->submitted_jobs_xa);
+		ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout);
+		if (!ret) {
+			ivpu_job_done_message(vdev, &jsm_msg);
+		} else if (ret == -ETIMEDOUT) {
+			if (jobs_submitted && !xa_empty(&vdev->submitted_jobs_xa)) {
+				ivpu_err(vdev, "TDR detected, timeout %d ms", timeout);
+				ivpu_hw_diagnose_failure(vdev);
+			}
+		}
+	}
+
+	ivpu_ipc_consumer_del(vdev, &cons);
+
+	ivpu_jobs_abort_all(vdev);
+
+	ivpu_dbg(vdev, JOB, "Stopped %s\n", __func__);
+	return 0;
+}
+
+int ivpu_job_done_thread_init(struct ivpu_device *vdev)
+{
+	struct task_struct *thread;
+
+	thread = kthread_run(&ivpu_job_done_thread, (void *)vdev, "ivpu_job_done_thread");
+	if (IS_ERR(thread)) {
+		ivpu_err(vdev, "Failed to start job completion thread\n");
+		return -EIO;
+	}
+
+	get_task_struct(thread);
+	wake_up_process(thread);
+
+	vdev->job_done_thread = thread;
+
+	return 0;
+}
+
+void ivpu_job_done_thread_fini(struct ivpu_device *vdev)
+{
+	kthread_stop(vdev->job_done_thread);
+	put_task_struct(vdev->job_done_thread);
+}
diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h
new file mode 100644
index 000000000000..aa1f0b9479b0
--- /dev/null
+++ b/drivers/accel/ivpu/ivpu_job.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020-2023 Intel Corporation
+ */
+
+#ifndef __IVPU_JOB_H__
+#define __IVPU_JOB_H__
+
+#include <linux/kref.h>
+#include <linux/idr.h>
+
+#include "ivpu_gem.h"
+
+struct ivpu_device;
+struct ivpu_file_priv;
+
+/**
+ * struct ivpu_cmdq - Object representing device queue used to send jobs.
+ * @jobq:	   Pointer to job queue memory shared with the device
+ * @mem:           Memory allocated for the job queue, shared with device
+ * @entry_count    Number of job entries in the queue
+ * @db_id:	   Doorbell assigned to this job queue
+ * @db_registered: True if doorbell is registered in device
+ */
+struct ivpu_cmdq {
+	struct vpu_job_queue *jobq;
+	struct ivpu_bo *mem;
+	u32 entry_count;
+	u32 db_id;
+	bool db_registered;
+};
+
+/**
+ * struct ivpu_job - KMD object that represents batchbuffer / DMA buffer.
+ * Each batch / DMA buffer is a job to be submitted and executed by the VPU FW.
+ * This is a unit of execution, and be tracked by the job_id for
+ * any status reporting from VPU FW through IPC JOB RET/DONE message.
+ * @file_priv:		  The client that submitted this job
+ * @job_id:		  Job ID for KMD tracking and job status reporting from VPU FW
+ * @status:		  Status of the Job from IPC JOB RET/DONE message
+ * @batch_buffer:	  CPU vaddr points to the batch buffer memory allocated for the job
+ * @submit_status_offset: Offset within batch buffer where job completion handler
+			  will update the job status
+ */
+struct ivpu_job {
+	struct kref ref;
+	struct ivpu_device *vdev;
+	struct ivpu_file_priv *file_priv;
+	struct dma_fence *done_fence;
+	u64 cmd_buf_vpu_addr;
+	u32 job_id;
+	u32 engine_idx;
+	size_t bo_count;
+	struct ivpu_bo *bos[];
+};
+
+int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+
+void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv);
+void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev);
+
+int ivpu_job_done_thread_init(struct ivpu_device *vdev);
+void ivpu_job_done_thread_fini(struct ivpu_device *vdev);
+
+void ivpu_jobs_abort_all(struct ivpu_device *vdev);
+
+#endif /* __IVPU_JOB_H__ */
author	Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>	2023-01-17 10:27:22 +0100
committer	Daniel Vetter <daniel.vetter@ffwll.ch>	2023-01-19 11:12:00 +0100
commit	cd7272215c44676dba236491941c6c406701cc5e (patch)
tree	e97a05cd4d479ebaaad330dfdb051feea8231a5e /drivers
parent	02d5b0aacd0590dbaf25f35834631e5bc11002e3 (diff)