summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorHarish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>2016-09-02 15:01:41 -0400
committerHarish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>2016-09-21 18:01:47 -0400
commit949cfecd0d080873eb036af9c534c4dd13596080 (patch)
treebc71c34ffa341e01ac69f68335124d4406dd837b /drivers
parente85bac90f6e26119fec89031f998985b5c23433b (diff)
drm/amdkfd: Add kgd2kfd schedule_evict_and_restore_process
Change-Id: I27860af58c54449a9ba1fc0a04e0436edb7fae8b Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c116
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c2
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h6
5 files changed, 136 insertions, 1 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 0ce957253e80..6acc5fc260bc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -26,6 +26,7 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/debugfs.h>
+#include <linux/fence.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers.h"
@@ -745,6 +746,42 @@ int kgd2kfd_resume_mm(struct kfd_dev *kfd, struct mm_struct *mm)
return r;
}
+/* quiesce_process_mm -
+ * Quiesce all user queues that belongs to given process p
+ */
+static int quiesce_process_mm(struct kfd_process *p)
+{
+ struct kfd_process_device *pdd;
+ int r = 0;
+ unsigned int n_evicted = 0;
+
+ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+ r = process_evict_queues(pdd->dev->dqm, &pdd->qpd);
+ if (r != 0) {
+ pr_err("Failed to evict process queues\n");
+ goto fail;
+ }
+ n_evicted++;
+ }
+
+ return r;
+
+fail:
+ /* To keep state consistent, roll back partial eviction by
+ * restoring queues
+ */
+ list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+ if (n_evicted == 0)
+ break;
+ if (process_restore_queues(pdd->dev->dqm, &pdd->qpd))
+ pr_err("Failed to restore queues\n");
+
+ n_evicted--;
+ }
+
+ return r;
+}
+
/* resume_process_mm -
* Resume all user queues that belongs to given process p. The caller must
* ensure that process p context is valid.
@@ -827,6 +864,85 @@ void kfd_restore_bo_worker(struct work_struct *work)
pr_err("Failed to resume user queues\n");
}
+/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
+ * prepare for safe eviction of KFD BOs that belong to the specified
+ * process.
+ *
+ * @mm: mm_struct that identifies the specified KFD process
+ * @fence: eviction fence attached to KFD process BOs
+ *
+ */
+int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
+ struct fence *fence)
+{
+ struct kfd_process *p;
+
+ if (!fence)
+ return -EINVAL;
+
+ if (fence_is_signaled(fence))
+ return 0;
+
+ p = kfd_lookup_process_by_mm(mm);
+ if (!p)
+ return -ENODEV;
+
+ if (work_pending(&p->eviction_work.work)) {
+ /* It is possible has TTM has lined up couple of BOs of the same
+ * process to be evicted. Check if the fence is same which
+ * indicates that previous work item scheduled is not complted
+ */
+ if (p->eviction_work.eviction_fence == fence)
+ goto out;
+ else {
+ WARN(1, "Starting new evict with previous evict is not completed\n");
+ cancel_work_sync(&p->eviction_work.work);
+ }
+ }
+
+ /* During process initialization eviction_work.work is initialized
+ * to kfd_evict_bo_worker
+ */
+ p->eviction_work.eviction_fence = fence_get(fence);
+ schedule_work(&p->eviction_work.work);
+out:
+ kfd_unref_process(p);
+ return 0;
+}
+
+void kfd_evict_bo_worker(struct work_struct *work)
+{
+ int ret;
+ struct kfd_process *p;
+ struct kfd_eviction_work *eviction_work;
+
+ eviction_work = container_of(work, struct kfd_eviction_work,
+ work);
+
+ /* Process termination destroys this worker thread. So during the
+ * lifetime of this thread, kfd_process p will be valid
+ */
+ p = container_of(eviction_work, struct kfd_process, eviction_work);
+
+ /* Narrow window of overlap between restore and evict work item is
+ * possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos unreserves
+ * KFD BOs, it is possible to evicted again. But restore has few more
+ * steps of finish. So lets wait for the restore work to complete
+ */
+ if (delayed_work_pending(&p->restore_work))
+ flush_delayed_work(&p->restore_work);
+
+ ret = quiesce_process_mm(p);
+ if (!ret) {
+ fence_signal(eviction_work->eviction_fence);
+ fence_put(eviction_work->eviction_fence);
+ kfd_schedule_restore_bos_and_queues(p);
+ } else {
+ pr_err("Failed to quiesce user queues. Cannot evict BOs\n");
+ }
+
+}
+
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
unsigned int chunk_size)
{
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 914a0cd04cfc..42c559bd76da 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -47,6 +47,8 @@ static const struct kgd2kfd_calls kgd2kfd = {
.restore = kgd2kfd_restore,
.quiesce_mm = kgd2kfd_quiesce_mm,
.resume_mm = kgd2kfd_resume_mm,
+ .schedule_evict_and_restore_process =
+ kgd2kfd_schedule_evict_and_restore_process,
};
int sched_policy = KFD_SCHED_POLICY_HWS;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index b3f26656f8bb..9dea8f210fc1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -542,12 +542,20 @@ struct qcm_process_device {
};
/* KFD Memory Eviction */
+struct kfd_eviction_work {
+ struct work_struct work;
+ struct fence *eviction_fence;
+};
+
/* Appox. wait time before attempting to restore evicted BOs */
#define PROCESS_RESTORE_TIME_MS 2000
/* Approx. back off time if restore fails due to lack of memory */
#define PROCESS_BACK_OFF_TIME_MS 1000
+void kfd_evict_bo_worker(struct work_struct *work);
void kfd_restore_bo_worker(struct work_struct *work);
+int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
+ struct fence *fence);
/*8 byte handle containing GPU ID in the most significant 4 bytes and
@@ -681,7 +689,8 @@ struct kfd_process {
void *master_vm;
- /* For restoring BOs after eviction */
+ /* Work items for evicting and restoring BOs */
+ struct kfd_eviction_work eviction_work;
struct delayed_work restore_work;
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 3121061d0ed6..d7a69e3f8b5b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -509,6 +509,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
p = container_of(mn, struct kfd_process, mmu_notifier);
BUG_ON(p->mm != mm);
+ cancel_work_sync(&p->eviction_work.work);
cancel_delayed_work_sync(&p->restore_work);
mutex_lock(&kfd_processes_mutex);
@@ -668,6 +669,7 @@ static struct kfd_process *create_process(const struct task_struct *thread,
if (err)
goto err_init_cwsr;
+ INIT_WORK(&process->eviction_work.work, kfd_evict_bo_worker);
INIT_DELAYED_WORK(&process->restore_work, kfd_restore_bo_worker);
return process;
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 25a47578bc21..b62b1eefa4d8 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -31,6 +31,7 @@
#include <linux/types.h>
#include <linux/mm_types.h>
#include <linux/scatterlist.h>
+#include <linux/fence.h>
struct pci_dev;
@@ -384,6 +385,9 @@ struct kfd2kgd_calls {
*
* @resume_mm: Resume user queue access to specified MM address space
*
+ * @schedule_evict_and_restore_process: Schedules work queue that will prepare
+ * for safe eviction of KFD BOs that belong to the specified process.
+ *
* This structure contains function callback pointers so the kgd driver
* will notify to the amdkfd about certain status changes.
*
@@ -402,6 +406,8 @@ struct kgd2kfd_calls {
int (*restore)(struct kfd_dev *kfd);
int (*quiesce_mm)(struct kfd_dev *kfd, struct mm_struct *mm);
int (*resume_mm)(struct kfd_dev *kfd, struct mm_struct *mm);
+ int (*schedule_evict_and_restore_process)(struct mm_struct *mm,
+ struct fence *fence);
};
int kgd2kfd_init(unsigned interface_version,